sfDomCssSelector.class.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. <?php
  2. /*
  3. * This file is part of the symfony package.
  4. * (c) 2004-2006 Fabien Potencier <fabien.potencier@symfony-project.com>
  5. *
  6. * For the full copyright and license information, please view the LICENSE
  7. * file that was distributed with this source code.
  8. */
  9. /**
  10. * sfDomCssSelector allows to navigate a DOM with CSS selector.
  11. *
  12. * Based on getElementsBySelector version 0.4 - Simon Willison, March 25th 2003
  13. * http://simon.incutio.com/archive/2003/03/25/getElementsBySelector
  14. *
  15. * Some methods based on the jquery library
  16. *
  17. * @package symfony
  18. * @subpackage util
  19. * @author Fabien Potencier <fabien.potencier@symfony-project.com>
  20. * @version SVN: $Id: sfDomCssSelector.class.php 10947 2008-08-19 14:11:06Z fabien $
  21. */
  22. class sfDomCssSelector
  23. {
  24. public $nodes = array();
  25. public function __construct($nodes)
  26. {
  27. if (!is_array($nodes))
  28. {
  29. $nodes = array($nodes);
  30. }
  31. $this->nodes = $nodes;
  32. }
  33. public function getNodes()
  34. {
  35. return $this->nodes;
  36. }
  37. public function getNode()
  38. {
  39. return $this->nodes ? $this->nodes[0] : null;
  40. }
  41. public function getValue()
  42. {
  43. return $this->nodes[0]->nodeValue;
  44. }
  45. public function getValues()
  46. {
  47. $values = array();
  48. foreach ($this->nodes as $node)
  49. {
  50. $values[] = $node->nodeValue;
  51. }
  52. return $values;
  53. }
  54. public function matchSingle($selector)
  55. {
  56. $nodes = $this->getElements($selector);
  57. return $nodes ? new sfDomCssSelector($nodes[0]) : new sfDomCssSelector(array());
  58. }
  59. public function matchAll($selector)
  60. {
  61. $nodes = $this->getElements($selector);
  62. return $nodes ? new sfDomCssSelector($nodes) : new sfDomCssSelector(array());
  63. }
  64. /* DEPRECATED */
  65. public function getTexts($selector)
  66. {
  67. $texts = array();
  68. foreach ($this->getElements($selector) as $element)
  69. {
  70. $texts[] = $element->nodeValue;
  71. }
  72. return $texts;
  73. }
  74. /* DEPRECATED */
  75. public function getElements($selector)
  76. {
  77. $nodes = array();
  78. foreach ($this->nodes as $node)
  79. {
  80. $result_nodes = $this->getElementsForNode($selector, $node);
  81. if ($result_nodes)
  82. {
  83. $nodes = array_merge($nodes, $result_nodes);
  84. }
  85. }
  86. foreach ($nodes as $node)
  87. {
  88. $node->removeAttribute('sf_matched');
  89. }
  90. return $nodes;
  91. }
  92. protected function getElementsForNode($selector, $root_node)
  93. {
  94. $all_nodes = array();
  95. foreach ($this->tokenize_selectors($selector) as $selector)
  96. {
  97. $nodes = array($root_node);
  98. foreach ($this->tokenize($selector) as $token)
  99. {
  100. $combinator = $token['combinator'];
  101. $selector = $token['selector'];
  102. $token = trim($token['name']);
  103. $pos = strpos($token, '#');
  104. if (false !== $pos && preg_match('/^[A-Za-z0-9]*$/', substr($token, 0, $pos)))
  105. {
  106. // Token is an ID selector
  107. $tagName = substr($token, 0, $pos);
  108. $id = substr($token, $pos + 1);
  109. $xpath = new DomXPath($root_node);
  110. $element = $xpath->query(sprintf("//*[@id = '%s']", $id))->item(0);
  111. if (!$element || ($tagName && strtolower($element->nodeName) != $tagName))
  112. {
  113. // tag with that ID not found
  114. return array();
  115. }
  116. // Set nodes to contain just this element
  117. $nodes = array($element);
  118. $nodes = $this->matchCustomSelector($nodes, $selector);
  119. continue; // Skip to next token
  120. }
  121. $pos = strpos($token, '.');
  122. if (false !== $pos && preg_match('/^[A-Za-z0-9\*]*$/', substr($token, 0, $pos)))
  123. {
  124. // Token contains a class selector
  125. $tagName = substr($token, 0, $pos);
  126. if (!$tagName)
  127. {
  128. $tagName = '*';
  129. }
  130. $className = substr($token, $pos + 1);
  131. // Get elements matching tag, filter them for class selector
  132. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  133. $nodes = array();
  134. foreach ($founds as $found)
  135. {
  136. if (preg_match('/\b'.$className.'\b/', $found->getAttribute('class')))
  137. {
  138. $nodes[] = $found;
  139. }
  140. }
  141. $nodes = $this->matchCustomSelector($nodes, $selector);
  142. continue; // Skip to next token
  143. }
  144. // Code to deal with attribute selectors
  145. if (preg_match('/^(\w+|\*)(\[.+\])$/', $token, $matches))
  146. {
  147. $tagName = $matches[1] ? $matches[1] : '*';
  148. preg_match_all('/
  149. \[
  150. ([\w\-]+) # attribute
  151. ([=~\|\^\$\*]?) # modifier (optional)
  152. =? # equal (optional)
  153. (
  154. "([^"]*)" # quoted value (optional)
  155. |
  156. ([^\]]*) # non quoted value (optional)
  157. )
  158. \]
  159. /x', $matches[2], $matches, PREG_SET_ORDER);
  160. // Grab all of the tagName elements within current node
  161. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  162. $nodes = array();
  163. foreach ($founds as $found)
  164. {
  165. $ok = false;
  166. foreach ($matches as $match)
  167. {
  168. $attrName = $match[1];
  169. $attrOperator = $match[2];
  170. $attrValue = $match[4];
  171. switch ($attrOperator)
  172. {
  173. case '=': // Equality
  174. $ok = $found->getAttribute($attrName) == $attrValue;
  175. break;
  176. case '~': // Match one of space seperated words
  177. $ok = preg_match('/\b'.preg_quote($attrValue, '/').'\b/', $found->getAttribute($attrName));
  178. break;
  179. case '|': // Match start with value followed by optional hyphen
  180. $ok = preg_match('/^'.preg_quote($attrValue, '/').'-?/', $found->getAttribute($attrName));
  181. break;
  182. case '^': // Match starts with value
  183. $ok = 0 === strpos($found->getAttribute($attrName), $attrValue);
  184. break;
  185. case '$': // Match ends with value
  186. $ok = $attrValue == substr($found->getAttribute($attrName), -strlen($attrValue));
  187. break;
  188. case '*': // Match ends with value
  189. $ok = false !== strpos($found->getAttribute($attrName), $attrValue);
  190. break;
  191. default :
  192. // Just test for existence of attribute
  193. $ok = $found->hasAttribute($attrName);
  194. }
  195. if (false == $ok)
  196. {
  197. break;
  198. }
  199. }
  200. if ($ok)
  201. {
  202. $nodes[] = $found;
  203. }
  204. }
  205. continue; // Skip to next token
  206. }
  207. // If we get here, token is JUST an element (not a class or ID selector)
  208. $nodes = $this->getElementsByTagName($nodes, $token, $combinator);
  209. $nodes = $this->matchCustomSelector($nodes, $selector);
  210. }
  211. foreach ($nodes as $node)
  212. {
  213. if (!$node->getAttribute('sf_matched'))
  214. {
  215. $node->setAttribute('sf_matched', true);
  216. $all_nodes[] = $node;
  217. }
  218. }
  219. }
  220. return $all_nodes;
  221. }
  222. protected function getElementsByTagName($nodes, $tagName, $combinator = ' ')
  223. {
  224. $founds = array();
  225. foreach ($nodes as $node)
  226. {
  227. switch ($combinator)
  228. {
  229. case ' ':
  230. // Descendant selector
  231. foreach ($node->getElementsByTagName($tagName) as $element)
  232. {
  233. $founds[] = $element;
  234. }
  235. break;
  236. case '>':
  237. // Child selector
  238. foreach ($node->childNodes as $element)
  239. {
  240. if ($tagName == $element->nodeName)
  241. {
  242. $founds[] = $element;
  243. }
  244. }
  245. break;
  246. case '+':
  247. // Adjacent selector
  248. $element = $node->nextSibling;
  249. if ($element && '#text' == $element->nodeName)
  250. {
  251. $element = $element->nextSibling;
  252. }
  253. if ($element && $tagName == $element->nodeName)
  254. {
  255. $founds[] = $element;
  256. }
  257. break;
  258. default:
  259. throw new Exception(sprintf('Unrecognized combinator "%s".', $combinator));
  260. }
  261. }
  262. return $founds;
  263. }
  264. protected function tokenize_selectors($selector)
  265. {
  266. // split tokens by , except in an attribute selector
  267. $tokens = array();
  268. $quoted = false;
  269. $token = '';
  270. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  271. {
  272. if (',' == $selector[$i] && !$quoted)
  273. {
  274. $tokens[] = trim($token);
  275. $token = '';
  276. }
  277. else if ('"' == $selector[$i])
  278. {
  279. $token .= $selector[$i];
  280. $quoted = $quoted ? false : true;
  281. }
  282. else
  283. {
  284. $token .= $selector[$i];
  285. }
  286. }
  287. if ($token)
  288. {
  289. $tokens[] = trim($token);
  290. }
  291. return $tokens;
  292. }
  293. protected function tokenize($selector)
  294. {
  295. // split tokens by space except if space is in an attribute selector
  296. $tokens = array();
  297. $combinators = array(' ', '>', '+');
  298. $quoted = false;
  299. $token = array('combinator' => ' ', 'name' => '');
  300. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  301. {
  302. if (in_array($selector[$i], $combinators) && !$quoted)
  303. {
  304. // remove all whitespaces around the combinator
  305. $combinator = $selector[$i];
  306. while (in_array($selector[$i + 1], $combinators))
  307. {
  308. if (' ' != $selector[++$i])
  309. {
  310. $combinator = $selector[$i];
  311. }
  312. }
  313. $tokens[] = $token;
  314. $token = array('combinator' => $combinator, 'name' => '');
  315. }
  316. else if ('"' == $selector[$i])
  317. {
  318. $token['name'] .= $selector[$i];
  319. $quoted = $quoted ? false : true;
  320. }
  321. else
  322. {
  323. $token['name'] .= $selector[$i];
  324. }
  325. }
  326. if ($token['name'])
  327. {
  328. $tokens[] = $token;
  329. }
  330. foreach ($tokens as &$token)
  331. {
  332. list($token['name'], $token['selector']) = $this->tokenize_selector_name($token['name']);
  333. }
  334. return $tokens;
  335. }
  336. protected function tokenize_selector_name($token_name)
  337. {
  338. // split custom selector
  339. $quoted = false;
  340. $name = '';
  341. $selector = '';
  342. $in_selector = false;
  343. for ($i = 0, $max = strlen($token_name); $i < $max; $i++)
  344. {
  345. if ('"' == $token_name[$i])
  346. {
  347. $quoted = $quoted ? false : true;
  348. }
  349. if (!$quoted && ':' == $token_name[$i])
  350. {
  351. $in_selector = true;
  352. }
  353. if ($in_selector)
  354. {
  355. $selector .= $token_name[$i];
  356. }
  357. else
  358. {
  359. $name .= $token_name[$i];
  360. }
  361. }
  362. return array($name, $selector);
  363. }
  364. protected function matchCustomSelector($nodes, $selector)
  365. {
  366. if (!$selector)
  367. {
  368. return $nodes;
  369. }
  370. $selector = $this->tokenize_custom_selector($selector);
  371. $matchingNodes = array();
  372. for ($i = 0, $max = count($nodes); $i < $max; $i++)
  373. {
  374. switch ($selector['selector'])
  375. {
  376. case 'contains':
  377. if (false !== strpos($nodes[$i]->textContent, $selector['parameter']))
  378. {
  379. $matchingNodes[] = $nodes[$i];
  380. }
  381. break;
  382. case 'nth-child':
  383. if ($nodes[$i] === $this->nth($nodes[$i]->parentNode->firstChild, (integer) $selector['parameter']))
  384. {
  385. $matchingNodes[] = $nodes[$i];
  386. }
  387. break;
  388. case 'first-child':
  389. if ($nodes[$i] === $this->nth($nodes[$i]->parentNode->firstChild))
  390. {
  391. $matchingNodes[] = $nodes[$i];
  392. }
  393. break;
  394. case 'last-child':
  395. if ($nodes[$i] === $this->nth($nodes[$i]->parentNode->lastChild, 1, 'previousSibling'))
  396. {
  397. $matchingNodes[] = $nodes[$i];
  398. }
  399. break;
  400. case 'lt':
  401. if ($i < (integer) $selector['parameter'])
  402. {
  403. $matchingNodes[] = $nodes[$i];
  404. }
  405. break;
  406. case 'gt':
  407. if ($i > (integer) $selector['parameter'])
  408. {
  409. $matchingNodes[] = $nodes[$i];
  410. }
  411. break;
  412. case 'odd':
  413. if ($i % 2)
  414. {
  415. $matchingNodes[] = $nodes[$i];
  416. }
  417. break;
  418. case 'even':
  419. if (0 == $i % 2)
  420. {
  421. $matchingNodes[] = $nodes[$i];
  422. }
  423. break;
  424. case 'nth':
  425. case 'eq':
  426. if ($i == (integer) $selector['parameter'])
  427. {
  428. $matchingNodes[] = $nodes[$i];
  429. }
  430. break;
  431. case 'first':
  432. if ($i == 0)
  433. {
  434. $matchingNodes[] = $nodes[$i];
  435. }
  436. break;
  437. case 'last':
  438. if ($i == $max - 1)
  439. {
  440. $matchingNodes[] = $nodes[$i];
  441. }
  442. break;
  443. default:
  444. throw new Exception(sprintf('Unrecognized selector "%s".', $selector['selector']));
  445. }
  446. }
  447. return $matchingNodes;
  448. }
  449. protected function tokenize_custom_selector($selector)
  450. {
  451. if (!preg_match('/
  452. ([a-zA-Z0-9\-]+)
  453. (?:
  454. \(
  455. (?:
  456. ("|\')(.*)?\2
  457. |
  458. (.*?)
  459. )
  460. \)
  461. )?
  462. /x', substr($selector, 1), $matches))
  463. {
  464. throw new Exception(sprintf('Unable to parse custom selector "%s".', $selector));
  465. }
  466. return array('selector' => $matches[1], 'parameter' => isset($matches[3]) ? ($matches[3] ? $matches[3] : $matches[4]) : '');
  467. }
  468. protected function nth($cur, $result = 1, $dir = 'nextSibling')
  469. {
  470. $num = 0;
  471. for (; $cur; $cur = $cur->$dir)
  472. {
  473. if (1 == $cur->nodeType)
  474. {
  475. ++$num;
  476. }
  477. if ($num == $result)
  478. {
  479. return $cur;
  480. }
  481. }
  482. }
  483. }