parser.php 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. <?php
  2. function parseHtml($data, $remove_tags = FALSE) {
  3. $substs = array(
  4. //html
  5. 'TD' => chr(9),
  6. 'TH' => chr(9),
  7. 'TR' => chr(13) . chr(10) . '<P>',
  8. 'BR' => chr(13) . chr(10) . '<P>',
  9. 'BR/' => chr(13) . chr(10) . '<P>',
  10. 'DD' => chr(13) . chr(10) . '<P>',
  11. 'P' => chr(13) . chr(10) . '<P>',
  12. 'HR' => chr(13) . chr(10),
  13. 'LI' => chr(13) . chr(10),
  14. 'OL' => chr(13) . chr(10),
  15. '/OL' => chr(13) . chr(10),
  16. 'TABLE' => chr(13) . chr(10),
  17. '/TABLE' => chr(13) . chr(10),
  18. 'TITLE' => '<br>&nbsp;',
  19. '/TITLE' => '<br>&nbsp;',
  20. 'UL' => chr(13) . chr(10) . ' ',
  21. '/UL' => chr(13) . chr(10),
  22. // fb2
  23. 'EMPTY-LINE/' => '<P>&nbsp;',
  24. 'STANZA' => '<P>&nbsp;',
  25. 'V' => '<P>',
  26. '/POEM' => '<P>&nbsp;',
  27. 'SUBTITLE' => '<br>&nbsp;<P>',
  28. '/SUBTITLE' => '<br>&nbsp;',
  29. );
  30. $inner_cut = array(
  31. 'HEAD' => 1,
  32. 'SCRIPT' => 1,
  33. 'STYLE' => 1,
  34. //fb2
  35. 'BINARY' => 1,
  36. 'DESCRIPTION' => 1,
  37. );
  38. if ($remove_tags)
  39. $substs = $inner_cut = array();
  40. $data = str_replace('&nbsp;', ' ', $data);
  41. $i = 0;
  42. $len = strlen($data);
  43. $out = '';
  44. $cut_counter = 0;
  45. $cut_tag = '';
  46. while ($i < $len) {
  47. $left = strpos($data, '<', $i);
  48. if ($left !== FALSE) {
  49. $right = strpos($data, '>', $left + 1);
  50. if ($right !== FALSE) {
  51. $tag = trim(substr($data, $left + 1, $right - $left - 1));
  52. $first_space = strpos($tag, ' ');
  53. if ($first_space !== FALSE)
  54. $tag = substr($tag, 0, $first_space);
  55. $tag = strtoupper($tag);
  56. if (!$cut_counter) {
  57. $out .= substr($data, $i, $left - $i);
  58. if (isset($substs[$tag]))
  59. $out .= $substs[$tag];
  60. }
  61. if (isset($inner_cut[$tag]) && (!$cut_counter || $cut_tag == $tag))
  62. {
  63. if (!$cut_counter)
  64. $cut_tag = $tag;
  65. $cut_counter++;
  66. }
  67. if ($tag != '' && $tag[0] == '/' && $cut_tag == substr($tag, 1)) {
  68. $cut_counter = ($cut_counter > 0) ? $cut_counter - 1 : 0;
  69. if (!$cut_counter)
  70. $cut_tag = '';
  71. }
  72. //$close_tag = substr($tag, 1);
  73. //$out .= "<br>$cut_counter, $cut_tag == $close_tag";
  74. $i = $right + 1;
  75. } else
  76. break;
  77. }
  78. else
  79. break;
  80. }
  81. if ($i < $len && !$cut_counter)
  82. $out .= substr($data, $i, $len - $i);
  83. return $out;
  84. }
  85. ?>