You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

756 lines
20 KiB

  1. <?php
  2. /**
  3. * Hoa
  4. *
  5. *
  6. * @license
  7. *
  8. * New BSD License
  9. *
  10. * Copyright © 2007-2016, Hoa community. All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or without
  13. * modification, are permitted provided that the following conditions are met:
  14. * * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * * Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in the
  18. * documentation and/or other materials provided with the distribution.
  19. * * Neither the name of the Hoa nor the names of its contributors may be
  20. * used to endorse or promote products derived from this software without
  21. * specific prior written permission.
  22. *
  23. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  24. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
  27. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  28. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  29. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  31. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  32. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. */
  35. namespace Hoa\Compiler\Llk;
  36. use Hoa\Compiler;
  37. /**
  38. * Class \Hoa\Compiler\Llk\Parser.
  39. *
  40. * PP parser.
  41. *
  42. * @copyright Copyright © 2007-2016 Hoa community
  43. * @license New BSD License
  44. */
  45. class Parser
  46. {
  47. /**
  48. * List of skipped tokens.
  49. *
  50. * @var array
  51. */
  52. protected $_skip = null;
  53. /**
  54. * Associative array (token name => token regex), to be defined in
  55. * precedence order.
  56. *
  57. * @var array
  58. */
  59. protected $_tokens = null;
  60. /**
  61. * Rules, to be defined as associative array, name => Rule object.
  62. *
  63. * @var array
  64. */
  65. protected $_rules = null;
  66. /**
  67. * Current state of the analyzer.
  68. *
  69. * @var int
  70. */
  71. protected $_currentState = 0;
  72. /**
  73. * Error state of the analyzer (when an error is encountered).
  74. *
  75. * @var int
  76. */
  77. protected $_errorState = 0;
  78. /**
  79. * Current token sequence being analyzed.
  80. *
  81. * @var array
  82. */
  83. protected $_tokenSequence = [];
  84. /**
  85. * Trace of activated rules.
  86. *
  87. * @var array
  88. */
  89. protected $_trace = [];
  90. /**
  91. * Stack of todo list.
  92. *
  93. * @var array
  94. */
  95. protected $_todo = null;
  96. /**
  97. * AST.
  98. *
  99. * @var \Hoa\Compiler\Llk\TreeNode
  100. */
  101. protected $_tree = null;
  102. /**
  103. * Current depth while building the trace.
  104. *
  105. * @var int
  106. */
  107. protected $_depth = -1;
  108. /**
  109. * Construct the parser.
  110. *
  111. * @param array $tokens Tokens.
  112. * @param array $rules Rules.
  113. * @return void
  114. */
  115. public function __construct(array $tokens = [], array $rules = [])
  116. {
  117. $this->_tokens = $tokens;
  118. $this->_rules = $rules;
  119. return;
  120. }
  121. /**
  122. * Parse :-).
  123. *
  124. * @param string $text Text to parse.
  125. * @param string $rule The axiom, i.e. root rule.
  126. * @param bool $tree Whether build tree or not.
  127. * @return mixed
  128. * @throws \Hoa\Compiler\Exception\UnexpectedToken
  129. */
  130. public function parse($text, $rule = null, $tree = true)
  131. {
  132. $lexer = new Lexer();
  133. $this->_tokenSequence = $lexer->lexMe($text, $this->_tokens);
  134. $this->_currentState = 0;
  135. $this->_errorState = 0;
  136. $this->_trace = [];
  137. $this->_todo = [];
  138. if (false === array_key_exists($rule, $this->_rules)) {
  139. $rule = $this->getRootRule();
  140. }
  141. $closeRule = new Rule\Ekzit($rule, 0);
  142. $openRule = new Rule\Entry($rule, 0, [$closeRule]);
  143. $this->_todo = [$closeRule, $openRule];
  144. do {
  145. $out = $this->unfold();
  146. if (null !== $out &&
  147. 'EOF' === $this->getCurrentToken()) {
  148. break;
  149. }
  150. if (false === $this->backtrack()) {
  151. $token = $this->_tokenSequence[$this->_errorState];
  152. $offset = $token['offset'];
  153. $line = 1;
  154. $column = 1;
  155. if (!empty($text)) {
  156. if (0 === $offset) {
  157. $leftnl = 0;
  158. } else {
  159. $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0;
  160. }
  161. $rightnl = strpos($text, "\n", $offset);
  162. $line = substr_count($text, "\n", 0, $leftnl + 1) + 1;
  163. $column = $offset - $leftnl + (0 === $leftnl);
  164. if (false !== $rightnl) {
  165. $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n");
  166. }
  167. }
  168. throw new Compiler\Exception\UnexpectedToken(
  169. 'Unexpected token "%s" (%s) at line %d and column %d:' .
  170. "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑',
  171. 0,
  172. [
  173. $token['value'],
  174. $token['token'],
  175. $line,
  176. $column,
  177. $text
  178. ],
  179. $line,
  180. $column
  181. );
  182. }
  183. } while (true);
  184. if (false === $tree) {
  185. return true;
  186. }
  187. $tree = $this->_buildTree();
  188. if (!($tree instanceof TreeNode)) {
  189. throw new Compiler\Exception(
  190. 'Parsing error: cannot build AST, the trace is corrupted.',
  191. 1
  192. );
  193. }
  194. return $this->_tree = $tree;
  195. }
  196. /**
  197. * Unfold trace.
  198. *
  199. * @return mixed
  200. */
  201. protected function unfold()
  202. {
  203. while (0 < count($this->_todo)) {
  204. $rule = array_pop($this->_todo);
  205. if ($rule instanceof Rule\Ekzit) {
  206. $rule->setDepth($this->_depth);
  207. $this->_trace[] = $rule;
  208. if (false === $rule->isTransitional()) {
  209. --$this->_depth;
  210. }
  211. } else {
  212. $ruleName = $rule->getRule();
  213. $next = $rule->getData();
  214. $zeRule = $this->_rules[$ruleName];
  215. $out = $this->_parse($zeRule, $next);
  216. if (false === $out) {
  217. if (false === $this->backtrack()) {
  218. return null;
  219. }
  220. }
  221. }
  222. }
  223. return true;
  224. }
  225. /**
  226. * Parse current rule.
  227. *
  228. * @param \Hoa\Compiler\Llk\Rule $zeRule Current rule.
  229. * @param int $next Next rule index.
  230. * @return bool
  231. */
  232. protected function _parse(Rule $zeRule, $next)
  233. {
  234. if ($zeRule instanceof Rule\Token) {
  235. $name = $this->getCurrentToken();
  236. if ($zeRule->getTokenName() !== $name) {
  237. return false;
  238. }
  239. $value = $this->getCurrentToken('value');
  240. if (0 <= $unification = $zeRule->getUnificationIndex()) {
  241. for ($skip = 0, $i = count($this->_trace) - 1; $i >= 0; --$i) {
  242. $trace = $this->_trace[$i];
  243. if ($trace instanceof Rule\Entry) {
  244. if (false === $trace->isTransitional()) {
  245. if ($trace->getDepth() <= $this->_depth) {
  246. break;
  247. }
  248. --$skip;
  249. }
  250. } elseif ($trace instanceof Rule\Ekzit &&
  251. false === $trace->isTransitional()) {
  252. $skip += $trace->getDepth() > $this->_depth;
  253. }
  254. if (0 < $skip) {
  255. continue;
  256. }
  257. if ($trace instanceof Rule\Token &&
  258. $unification === $trace->getUnificationIndex() &&
  259. $value !== $trace->getValue()) {
  260. return false;
  261. }
  262. }
  263. }
  264. $namespace = $this->getCurrentToken('namespace');
  265. $zzeRule = clone $zeRule;
  266. $zzeRule->setValue($value);
  267. $zzeRule->setNamespace($namespace);
  268. if (isset($this->_tokens[$namespace][$name])) {
  269. $zzeRule->setRepresentation($this->_tokens[$namespace][$name]);
  270. } else {
  271. foreach ($this->_tokens[$namespace] as $_name => $regex) {
  272. if (false === $pos = strpos($_name, ':')) {
  273. continue;
  274. }
  275. $_name = substr($_name, 0, $pos);
  276. if ($_name === $name) {
  277. break;
  278. }
  279. }
  280. $zzeRule->setRepresentation($regex);
  281. }
  282. array_pop($this->_todo);
  283. $this->_trace[] = $zzeRule;
  284. $this->_errorState = ++$this->_currentState;
  285. return true;
  286. } elseif ($zeRule instanceof Rule\Concatenation) {
  287. if (false === $zeRule->isTransitional()) {
  288. ++$this->_depth;
  289. }
  290. $this->_trace[] = new Rule\Entry(
  291. $zeRule->getName(),
  292. 0,
  293. null,
  294. $this->_depth
  295. );
  296. $content = $zeRule->getContent();
  297. for ($i = count($content) - 1; $i >= 0; --$i) {
  298. $nextRule = $content[$i];
  299. $this->_todo[] = new Rule\Ekzit($nextRule, 0);
  300. $this->_todo[] = new Rule\Entry($nextRule, 0);
  301. }
  302. return true;
  303. } elseif ($zeRule instanceof Rule\Choice) {
  304. $content = $zeRule->getContent();
  305. if ($next >= count($content)) {
  306. return false;
  307. }
  308. if (false === $zeRule->isTransitional()) {
  309. ++$this->_depth;
  310. }
  311. $this->_trace[] = new Rule\Entry(
  312. $zeRule->getName(),
  313. $next,
  314. $this->_todo,
  315. $this->_depth
  316. );
  317. $nextRule = $content[$next];
  318. $this->_todo[] = new Rule\Ekzit($nextRule, 0);
  319. $this->_todo[] = new Rule\Entry($nextRule, 0);
  320. return true;
  321. } elseif ($zeRule instanceof Rule\Repetition) {
  322. $nextRule = $zeRule->getContent();
  323. if (0 === $next) {
  324. $name = $zeRule->getName();
  325. $min = $zeRule->getMin();
  326. if (false === $zeRule->isTransitional()) {
  327. ++$this->_depth;
  328. }
  329. $this->_trace[] = new Rule\Entry(
  330. $name,
  331. $min,
  332. null,
  333. $this->_depth
  334. );
  335. array_pop($this->_todo);
  336. $this->_todo[] = new Rule\Ekzit(
  337. $name,
  338. $min,
  339. $this->_todo
  340. );
  341. for ($i = 0; $i < $min; ++$i) {
  342. $this->_todo[] = new Rule\Ekzit($nextRule, 0);
  343. $this->_todo[] = new Rule\Entry($nextRule, 0);
  344. }
  345. return true;
  346. } else {
  347. $max = $zeRule->getMax();
  348. if (-1 != $max && $next > $max) {
  349. return false;
  350. }
  351. $this->_todo[] = new Rule\Ekzit(
  352. $zeRule->getName(),
  353. $next,
  354. $this->_todo
  355. );
  356. $this->_todo[] = new Rule\Ekzit($nextRule, 0);
  357. $this->_todo[] = new Rule\Entry($nextRule, 0);
  358. return true;
  359. }
  360. }
  361. return false;
  362. }
  363. /**
  364. * Backtrack the trace.
  365. *
  366. * @return bool
  367. */
  368. protected function backtrack()
  369. {
  370. $found = false;
  371. do {
  372. $last = array_pop($this->_trace);
  373. if ($last instanceof Rule\Entry) {
  374. $zeRule = $this->_rules[$last->getRule()];
  375. $found = $zeRule instanceof Rule\Choice;
  376. } elseif ($last instanceof Rule\Ekzit) {
  377. $zeRule = $this->_rules[$last->getRule()];
  378. $found = $zeRule instanceof Rule\Repetition;
  379. } elseif ($last instanceof Rule\Token) {
  380. --$this->_currentState;
  381. }
  382. } while (0 < count($this->_trace) && false === $found);
  383. if (false === $found) {
  384. return false;
  385. }
  386. $rule = $last->getRule();
  387. $next = $last->getData() + 1;
  388. $this->_depth = $last->getDepth();
  389. $this->_todo = $last->getTodo();
  390. $this->_todo[] = new Rule\Entry($rule, $next);
  391. return true;
  392. }
  393. /**
  394. * Build AST from trace.
  395. * Walk through the trace iteratively and recursively.
  396. *
  397. * @param int $i Current trace index.
  398. * @param array &$children Collected children.
  399. * @return \Hoa\Compiler\Llk\TreeNode
  400. */
  401. protected function _buildTree($i = 0, &$children = [])
  402. {
  403. $max = count($this->_trace);
  404. while ($i < $max) {
  405. $trace = $this->_trace[$i];
  406. if ($trace instanceof Rule\Entry) {
  407. $ruleName = $trace->getRule();
  408. $rule = $this->_rules[$ruleName];
  409. $isRule = false === $trace->isTransitional();
  410. $nextTrace = $this->_trace[$i + 1];
  411. $id = $rule->getNodeId();
  412. // Optimization: Skip empty trace sequence.
  413. if ($nextTrace instanceof Rule\Ekzit &&
  414. $ruleName == $nextTrace->getRule()) {
  415. $i += 2;
  416. continue;
  417. }
  418. if (true === $isRule) {
  419. $children[] = $ruleName;
  420. }
  421. if (null !== $id) {
  422. $children[] = [
  423. 'id' => $id,
  424. 'options' => $rule->getNodeOptions()
  425. ];
  426. }
  427. $i = $this->_buildTree($i + 1, $children);
  428. if (false === $isRule) {
  429. continue;
  430. }
  431. $handle = [];
  432. $cId = null;
  433. $cOptions = [];
  434. do {
  435. $pop = array_pop($children);
  436. if (true === is_object($pop)) {
  437. $handle[] = $pop;
  438. } elseif (true === is_array($pop) && null === $cId) {
  439. $cId = $pop['id'];
  440. $cOptions = $pop['options'];
  441. } elseif ($ruleName == $pop) {
  442. break;
  443. }
  444. } while (null !== $pop);
  445. if (null === $cId) {
  446. $cId = $rule->getDefaultId();
  447. $cOptions = $rule->getDefaultOptions();
  448. }
  449. if (null === $cId) {
  450. for ($j = count($handle) - 1; $j >= 0; --$j) {
  451. $children[] = $handle[$j];
  452. }
  453. continue;
  454. }
  455. if (true === in_array('M', $cOptions) &&
  456. true === $this->mergeTree($children, $handle, $cId)) {
  457. continue;
  458. }
  459. if (true === in_array('m', $cOptions) &&
  460. true === $this->mergeTree($children, $handle, $cId, true)) {
  461. continue;
  462. }
  463. $cTree = new TreeNode($id ?: $cId);
  464. foreach ($handle as $child) {
  465. $child->setParent($cTree);
  466. $cTree->prependChild($child);
  467. }
  468. $children[] = $cTree;
  469. } elseif ($trace instanceof Rule\Ekzit) {
  470. return $i + 1;
  471. } else {
  472. if (false === $trace->isKept()) {
  473. ++$i;
  474. continue;
  475. }
  476. $child = new TreeNode('token', [
  477. 'token' => $trace->getTokenName(),
  478. 'value' => $trace->getValue(),
  479. 'namespace' => $trace->getNamespace(),
  480. ]);
  481. $children[] = $child;
  482. ++$i;
  483. }
  484. }
  485. return $children[0];
  486. }
  487. /**
  488. * Try to merge directly children into an existing node.
  489. *
  490. * @param array &$children Current children being gathering.
  491. * @param array &$handle Children of the new node.
  492. * @param string $cId Node ID.
  493. * @param bool $recursive Whether we should merge recursively or
  494. * not.
  495. * @return bool
  496. */
  497. protected function mergeTree(
  498. &$children,
  499. &$handle,
  500. $cId,
  501. $recursive = false
  502. ) {
  503. end($children);
  504. $last = current($children);
  505. if (!is_object($last)) {
  506. return false;
  507. }
  508. if ($cId !== $last->getId()) {
  509. return false;
  510. }
  511. if (true === $recursive) {
  512. foreach ($handle as $child) {
  513. $this->mergeTreeRecursive($last, $child);
  514. }
  515. return true;
  516. }
  517. foreach ($handle as $child) {
  518. $last->appendChild($child);
  519. $child->setParent($last);
  520. }
  521. return true;
  522. }
  523. /**
  524. * Merge recursively.
  525. * Please, see self::mergeTree() to know the context.
  526. *
  527. * @param \Hoa\Compiler\Llk\TreeNode $node Node that receives.
  528. * @param \Hoa\Compiler\Llk\TreeNode $newNode Node to merge.
  529. * @return void
  530. */
  531. protected function mergeTreeRecursive(TreeNode $node, TreeNode $newNode)
  532. {
  533. $nNId = $newNode->getId();
  534. if ('token' === $nNId) {
  535. $node->appendChild($newNode);
  536. $newNode->setParent($node);
  537. return;
  538. }
  539. $children = $node->getChildren();
  540. end($children);
  541. $last = current($children);
  542. if ($last->getId() !== $nNId) {
  543. $node->appendChild($newNode);
  544. $newNode->setParent($node);
  545. return;
  546. }
  547. foreach ($newNode->getChildren() as $child) {
  548. $this->mergeTreeRecursive($last, $child);
  549. }
  550. return;
  551. }
  552. /**
  553. * Get current token.
  554. *
  555. * @param string $kind Token informations.
  556. * @return mixed
  557. */
  558. public function getCurrentToken($kind = 'token')
  559. {
  560. return $this->_tokenSequence[$this->_currentState][$kind];
  561. }
  562. /**
  563. * Get AST.
  564. *
  565. * @return \Hoa\Compiler\Llk\TreeNode
  566. */
  567. public function getTree()
  568. {
  569. return $this->_tree;
  570. }
  571. /**
  572. * Get trace.
  573. *
  574. * @return array
  575. */
  576. public function getTrace()
  577. {
  578. return $this->_trace;
  579. }
  580. /**
  581. * Get tokens.
  582. *
  583. * @return array
  584. */
  585. public function getTokens()
  586. {
  587. return $this->_tokens;
  588. }
  589. /**
  590. * Get token sequence.
  591. *
  592. * @return array
  593. */
  594. public function getTokenSequence()
  595. {
  596. return $this->_tokenSequence;
  597. }
  598. /**
  599. * Get rule by name.
  600. *
  601. * @param string $name Rule name.
  602. * @return \Hoa\Compiler\Llk\Rule
  603. */
  604. public function getRule($name)
  605. {
  606. if (!isset($this->_rules[$name])) {
  607. return null;
  608. }
  609. return $this->_rules[$name];
  610. }
  611. /**
  612. * Get rules.
  613. *
  614. * @return array
  615. */
  616. public function getRules()
  617. {
  618. return $this->_rules;
  619. }
  620. /**
  621. * Get root rule.
  622. *
  623. * @return string
  624. */
  625. public function getRootRule()
  626. {
  627. foreach ($this->_rules as $rule => $_) {
  628. if (!is_int($rule)) {
  629. break;
  630. }
  631. }
  632. return $rule;
  633. }
  634. }