fb2xml2.cpp 18 KB


  1. #include "fb2xml2.h"
  2. #ifdef FB2_USE_LIBXML2
  3. #include <cstring>
  4. #include <libxml/tree.h>
  5. #include <libxml/parser.h>
  6. #include <libxml/HTMLparser.h>
  7. #include <libxml/xmlreader.h>
  8. #include <QtDebug>
  9. namespace XML2 {
  10. //---------------------------------------------------------------------------
  11. // XML2::HtmlReader
  12. //---------------------------------------------------------------------------
  13. class HtmlReaderLocator : public QXmlLocator {
  14. public:
  15. HtmlReaderLocator(HtmlReader* r) : reader(r) {}
  16. virtual int columnNumber(void) const;
  17. virtual int lineNumber(void) const;
  18. private:
  19. HtmlReader* reader;
  20. };
  21. class HtmlReaderPrivate
  22. {
  23. private:
  24. class ClosedTag : public QList<QString> { public: ClosedTag(); };
  25. public:
  26. ~HtmlReaderPrivate(void) {}
  27. private:
  28. HtmlReaderPrivate(HtmlReader* reader);
  29. static void startDocument(void* c);
  30. static void endDocument(void* c);
  31. static void startElement(void* c, const xmlChar* name, const xmlChar** attrs);
  32. static void endElement(void* c, const xmlChar* name);
  33. static void comment(void* c, const xmlChar* value);
  34. static void cdataBlock(void* c, const xmlChar* value, int len);
  35. static void processingInstruction(void* c, const xmlChar* target, const xmlChar* data);
  36. static void characters(void* c, const xmlChar* ch, int len);
  37. static void ignorableWhitespace(void* c, const xmlChar* ch, int len);
  38. static void internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId);
  39. static QString C2S(const xmlChar* text, int size = -1);
  40. static QString local(const QString &name);
  41. void parse(const QXmlInputSource* input);
  42. QScopedPointer<HtmlReaderLocator> locator;
  43. Q_DECLARE_PUBLIC(HtmlReader)
  44. HtmlReader* q_ptr;
  45. QXmlEntityResolver* entityresolver;
  46. QXmlDTDHandler* dtdhandler;
  47. QXmlContentHandler* contenthandler;
  48. QXmlErrorHandler* errorhandler;
  49. QXmlLexicalHandler* lexicalhandler;
  50. QXmlDeclHandler* declhandler;
  51. xmlParserCtxt* context;
  52. QList<QString> closed;
  53. friend class HtmlReaderLocator;
  54. };
  55. HtmlReaderPrivate::HtmlReaderPrivate(HtmlReader* reader)
  56. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), context(0)
  57. {
  58. this->locator.reset(new HtmlReaderLocator(reader));
  59. }
  60. HtmlReaderPrivate::ClosedTag::ClosedTag()
  61. {
  62. *this << "area";
  63. *this << "base";
  64. *this << "br";
  65. *this << "col";
  66. *this << "command";
  67. *this << "embed";
  68. *this << "hr";
  69. *this << "img";
  70. *this << "input";
  71. *this << "keygen";
  72. *this << "link";
  73. *this << "meta";
  74. *this << "param";
  75. *this << "source";
  76. *this << "track";
  77. *this << "wbr";
  78. }
  79. QString HtmlReaderPrivate::C2S(const xmlChar* text, int size)
  80. {
  81. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  82. }
  83. void HtmlReaderPrivate::parse(const QXmlInputSource* input)
  84. {
  85. htmlSAXHandler handler;
  86. QByteArray arr = input->data().toUtf8();
  87. std::memset(&handler, 0, sizeof(handler));
  88. handler.startDocument = &HtmlReaderPrivate::startDocument;
  89. handler.endDocument = &HtmlReaderPrivate::endDocument;
  90. handler.startElement = &HtmlReaderPrivate::startElement;
  91. handler.endElement = &HtmlReaderPrivate::endElement;
  92. handler.comment = &HtmlReaderPrivate::comment;
  93. handler.cdataBlock = &HtmlReaderPrivate::cdataBlock;
  94. handler.processingInstruction = &HtmlReaderPrivate::processingInstruction;
  95. handler.characters = &HtmlReaderPrivate::characters;
  96. handler.ignorableWhitespace = &HtmlReaderPrivate::ignorableWhitespace;
  97. handler.internalSubset = &HtmlReaderPrivate::internalSubset;
  98. this->context = htmlCreatePushParserCtxt(&handler, this, arr.constData(), arr.size(), "", XML_CHAR_ENCODING_UTF8);
  99. htmlParseChunk(this->context, NULL, 0, 1);
  100. htmlFreeParserCtxt(this->context);
  101. xmlCleanupParser();
  102. }
  103. void HtmlReaderPrivate::startDocument(void* c)
  104. {
  105. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  106. if (r->contenthandler) {
  107. r->contenthandler->startDocument();
  108. }
  109. }
  110. void HtmlReaderPrivate::endDocument(void* c)
  111. {
  112. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  113. if (r->contenthandler) {
  114. r->contenthandler->endDocument();
  115. }
  116. }
  117. QString HtmlReaderPrivate::local(const QString &name)
  118. {
  119. return name.mid(name.lastIndexOf(":"));
  120. }
  121. void HtmlReaderPrivate::startElement(void* c, const xmlChar* name, const xmlChar** attrs)
  122. {
  123. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  124. if (r->contenthandler) {
  125. QXmlAttributes a;
  126. if (attrs) {
  127. int i = 0;
  128. while (attrs[i]) {
  129. QString qName = C2S(attrs[i]);
  130. a.append(qName, "", local(qName), C2S(attrs[i+1]));
  131. i += 2;
  132. }
  133. }
  134. static ClosedTag closed;
  135. QString qName = C2S(name);
  136. QString localName = local(qName);
  137. r->contenthandler->startElement("", localName, qName, a);
  138. if (closed.indexOf(qName.toLower()) != -1) {
  139. r->contenthandler->endElement("", localName, qName);
  140. }
  141. }
  142. }
  143. void HtmlReaderPrivate::endElement(void* c, const xmlChar* name)
  144. {
  145. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  146. if (r->contenthandler) {
  147. QString qName = C2S(name);
  148. r->contenthandler->endElement("", local(qName), qName);
  149. }
  150. }
  151. void HtmlReaderPrivate::comment(void* c, const xmlChar* value)
  152. {
  153. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  154. if (r->lexicalhandler) {
  155. r->lexicalhandler->comment(C2S(value));
  156. }
  157. }
  158. void HtmlReaderPrivate::cdataBlock(void* c, const xmlChar* value, int len)
  159. {
  160. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  161. if (r->lexicalhandler) {
  162. r->lexicalhandler->startCDATA();
  163. if (r->contenthandler) {
  164. r->contenthandler->characters(C2S(value, len));
  165. }
  166. r->lexicalhandler->endCDATA();
  167. }
  168. }
  169. void HtmlReaderPrivate::processingInstruction(void* c, const xmlChar* target, const xmlChar* data)
  170. {
  171. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  172. if (r->contenthandler) {
  173. r->contenthandler->processingInstruction(C2S(target), C2S(data));
  174. }
  175. }
  176. void HtmlReaderPrivate::characters(void* c, const xmlChar* ch, int len)
  177. {
  178. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  179. if (r->contenthandler) {
  180. r->contenthandler->characters(C2S(ch, len));
  181. }
  182. }
  183. void HtmlReaderPrivate::ignorableWhitespace(void* c, const xmlChar* ch, int len)
  184. {
  185. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  186. if (r->contenthandler) {
  187. r->contenthandler->ignorableWhitespace(C2S(ch, len));
  188. }
  189. }
  190. void HtmlReaderPrivate::internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId)
  191. {
  192. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  193. if (r->lexicalhandler) {
  194. r->lexicalhandler->startDTD(C2S(name), C2S(publicId), C2S(systemId));
  195. r->lexicalhandler->endDTD();
  196. }
  197. }
  198. HtmlReader::HtmlReader(void)
  199. : d_ptr(new HtmlReaderPrivate(this))
  200. {
  201. }
  202. HtmlReader::~HtmlReader(void)
  203. {
  204. }
  205. bool HtmlReader::feature(const QString&, bool* ok) const
  206. {
  207. if (ok) {
  208. *ok = false;
  209. }
  210. return false;
  211. }
  212. void HtmlReader::setFeature(const QString&, bool)
  213. {
  214. }
  215. bool HtmlReader::hasFeature(const QString&) const
  216. {
  217. return false;
  218. }
  219. void* HtmlReader::property(const QString&, bool* ok) const
  220. {
  221. if (ok) {
  222. *ok = false;
  223. }
  224. return 0;
  225. }
  226. void HtmlReader::setProperty(const QString&, void*)
  227. {
  228. }
  229. bool HtmlReader::hasProperty(const QString&) const
  230. {
  231. return false;
  232. }
  233. void HtmlReader::setEntityResolver(QXmlEntityResolver* handler)
  234. {
  235. Q_D(HtmlReader);
  236. d->entityresolver = handler;
  237. }
  238. QXmlEntityResolver* HtmlReader::entityResolver(void) const
  239. {
  240. const HtmlReaderPrivate* d = this->d_func();
  241. return d->entityresolver;
  242. }
  243. void HtmlReader::setDTDHandler(QXmlDTDHandler* handler)
  244. {
  245. Q_D(HtmlReader);
  246. d->dtdhandler = handler;
  247. }
  248. QXmlDTDHandler* HtmlReader::DTDHandler(void) const
  249. {
  250. const HtmlReaderPrivate* d = this->d_func();
  251. return d->dtdhandler;
  252. }
  253. void HtmlReader::setContentHandler(QXmlContentHandler* handler)
  254. {
  255. Q_D(HtmlReader);
  256. d->contenthandler = handler;
  257. }
  258. QXmlContentHandler* HtmlReader::contentHandler(void) const
  259. {
  260. const HtmlReaderPrivate* d = this->d_func();
  261. return d->contenthandler;
  262. }
  263. void HtmlReader::setErrorHandler(QXmlErrorHandler* handler)
  264. {
  265. Q_D(HtmlReader);
  266. d->errorhandler = handler;
  267. }
  268. QXmlErrorHandler* HtmlReader::errorHandler(void) const
  269. {
  270. const HtmlReaderPrivate* d = this->d_func();
  271. return d->errorhandler;
  272. }
  273. void HtmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  274. {
  275. Q_D(HtmlReader);
  276. d->lexicalhandler = handler;
  277. }
  278. QXmlLexicalHandler* HtmlReader::lexicalHandler(void) const
  279. {
  280. const HtmlReaderPrivate* d = this->d_func();
  281. return d->lexicalhandler;
  282. }
  283. void HtmlReader::setDeclHandler(QXmlDeclHandler* handler)
  284. {
  285. Q_D(HtmlReader);
  286. d->declhandler = handler;
  287. }
  288. QXmlDeclHandler* HtmlReader::declHandler(void) const
  289. {
  290. const HtmlReaderPrivate* d = this->d_func();
  291. return d->declhandler;
  292. }
  293. bool HtmlReader::parse(const QXmlInputSource& input)
  294. {
  295. return this->parse(&input);
  296. }
  297. bool HtmlReader::parse(const QXmlInputSource* input)
  298. {
  299. Q_D(HtmlReader);
  300. if (d->contenthandler) {
  301. d->contenthandler->setDocumentLocator(d->locator.data());
  302. }
  303. d->parse(input);
  304. return true;
  305. }
  306. int HtmlReaderLocator::columnNumber(void) const
  307. {
  308. return this->reader->d_func()->context->input->col;
  309. }
  310. int HtmlReaderLocator::lineNumber(void) const
  311. {
  312. return this->reader->d_func()->context->input->line;
  313. }
  314. //---------------------------------------------------------------------------
  315. // XML2::HtmlReader
  316. //---------------------------------------------------------------------------
  317. class XmlReaderLocator : public QXmlLocator {
  318. public:
  319. XmlReaderLocator(XmlReader* r) : reader(r) {}
  320. virtual int columnNumber(void) const;
  321. virtual int lineNumber(void) const;
  322. private:
  323. XmlReader* reader;
  324. };
  325. class XmlReaderPrivate {
  326. public:
  327. ~XmlReaderPrivate(void) {}
  328. private:
  329. XmlReaderPrivate(XmlReader* reader);
  330. static void onError(void *arg, const char *msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator);
  331. static int onRead(void * context, char * buffer, int len);
  332. static QString C2S(const xmlChar* text, int size = -1);
  333. bool parse(const QXmlInputSource* input);
  334. bool parse(QIODevice& input);
  335. void process(xmlTextReaderPtr reader);
  336. QScopedPointer<XmlReaderLocator> locator;
  337. Q_DECLARE_PUBLIC(XmlReader)
  338. XmlReader* q_ptr;
  339. QXmlEntityResolver* entityresolver;
  340. QXmlDTDHandler* dtdhandler;
  341. QXmlContentHandler* contenthandler;
  342. QXmlErrorHandler* errorhandler;
  343. QXmlLexicalHandler* lexicalhandler;
  344. QXmlDeclHandler* declhandler;
  345. xmlTextReaderPtr m_reader;
  346. friend class XmlReaderLocator;
  347. };
  348. XmlReaderPrivate::XmlReaderPrivate(XmlReader* reader)
  349. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), m_reader(0)
  350. {
  351. this->locator.reset(new XmlReaderLocator(reader));
  352. }
  353. QString XmlReaderPrivate::C2S(const xmlChar* text, int size)
  354. {
  355. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  356. }
  357. void XmlReaderPrivate::onError(void * arg, const char * msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator)
  358. {
  359. XmlReaderPrivate* r = reinterpret_cast<XmlReaderPrivate*>(arg);
  360. if (r->errorhandler) {
  361. QXmlParseException e(QString::fromLocal8Bit(msg), xmlTextReaderGetParserColumnNumber(r->m_reader), xmlTextReaderGetParserLineNumber(r->m_reader));
  362. switch (severity) {
  363. case XML_PARSER_SEVERITY_VALIDITY_WARNING: r->errorhandler->warning(e); break;
  364. case XML_PARSER_SEVERITY_VALIDITY_ERROR: r->errorhandler->error(e); break;
  365. case XML_PARSER_SEVERITY_WARNING: r->errorhandler->warning(e); break;
  366. case XML_PARSER_SEVERITY_ERROR: r->errorhandler->error(e); break;
  367. }
  368. }
  369. }
  370. void XmlReaderPrivate::process(xmlTextReaderPtr reader)
  371. {
  372. if (!contenthandler) return;
  373. switch (xmlTextReaderNodeType(reader)) {
  374. case XML_READER_TYPE_ELEMENT: {
  375. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  376. QString qName = C2S(xmlTextReaderConstName(reader));
  377. bool empty = xmlTextReaderIsEmptyElement(reader);
  378. QXmlAttributes atts;
  379. while (xmlTextReaderMoveToNextAttribute(reader)) {
  380. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  381. QString qName = C2S(xmlTextReaderConstName(reader));
  382. QString value = C2S(xmlTextReaderConstValue(reader));
  383. atts.append(qName, "", localName, value);
  384. }
  385. contenthandler->startElement("", localName, qName, atts);
  386. if (empty) contenthandler->endElement("", localName, qName);
  387. } break;
  388. case XML_READER_TYPE_TEXT: {
  389. QString value = C2S(xmlTextReaderConstValue(reader));
  390. contenthandler->characters(value);
  391. } break;
  392. case XML_READER_TYPE_END_ELEMENT: {
  393. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  394. QString qName = C2S(xmlTextReaderConstName(reader));
  395. contenthandler->endElement("", localName, qName);
  396. } break;
  397. case XML_READER_TYPE_COMMENT: {
  398. if (lexicalhandler) {
  399. QString value = C2S(xmlTextReaderConstValue(reader));
  400. lexicalhandler->comment(value);
  401. }
  402. } break;
  403. }
  404. }
  405. int XmlReaderPrivate::onRead(void * context, char * buffer, int len)
  406. {
  407. QIODevice *device = reinterpret_cast<QIODevice*>(context);
  408. return device->read(buffer, len);
  409. }
  410. bool XmlReaderPrivate::parse(const QXmlInputSource* input)
  411. {
  412. QByteArray arr = input->data().toUtf8();
  413. int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET;
  414. m_reader = xmlReaderForMemory(arr.constData(), arr.size(), NULL, NULL, options);
  415. if (!m_reader) return false;
  416. xmlTextReaderSetErrorHandler(m_reader, &XmlReaderPrivate::onError, this);
  417. while (xmlTextReaderRead(m_reader) == 1) process(m_reader);
  418. xmlFreeTextReader(m_reader);
  419. return true;
  420. }
  421. bool XmlReaderPrivate::parse(QIODevice& input)
  422. {
  423. int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET;
  424. m_reader = xmlReaderForIO(&XmlReaderPrivate::onRead, NULL, &input, NULL, NULL, options);
  425. if (!m_reader) return false;
  426. xmlTextReaderSetErrorHandler(m_reader, &XmlReaderPrivate::onError, this);
  427. while (xmlTextReaderRead(m_reader) == 1) process(m_reader);
  428. xmlFreeTextReader(m_reader);
  429. return true;
  430. }
  431. XmlReader::XmlReader(void)
  432. : d_ptr(new XmlReaderPrivate(this))
  433. {
  434. }
  435. XmlReader::~XmlReader(void)
  436. {
  437. }
  438. bool XmlReader::feature(const QString&, bool* ok) const
  439. {
  440. if (ok) *ok = false;
  441. return false;
  442. }
  443. void XmlReader::setFeature(const QString&, bool)
  444. {
  445. }
  446. bool XmlReader::hasFeature(const QString&) const
  447. {
  448. return false;
  449. }
  450. void* XmlReader::property(const QString&, bool* ok) const
  451. {
  452. if (ok) *ok = false;
  453. return 0;
  454. }
  455. void XmlReader::setProperty(const QString&, void*)
  456. {
  457. }
  458. bool XmlReader::hasProperty(const QString&) const
  459. {
  460. return false;
  461. }
  462. void XmlReader::setEntityResolver(QXmlEntityResolver* handler)
  463. {
  464. Q_D(XmlReader);
  465. d->entityresolver = handler;
  466. }
  467. QXmlEntityResolver* XmlReader::entityResolver(void) const
  468. {
  469. const XmlReaderPrivate* d = this->d_func();
  470. return d->entityresolver;
  471. }
  472. void XmlReader::setDTDHandler(QXmlDTDHandler* handler)
  473. {
  474. Q_D(XmlReader);
  475. d->dtdhandler = handler;
  476. }
  477. QXmlDTDHandler* XmlReader::DTDHandler(void) const
  478. {
  479. const XmlReaderPrivate* d = this->d_func();
  480. return d->dtdhandler;
  481. }
  482. void XmlReader::setContentHandler(QXmlContentHandler* handler)
  483. {
  484. Q_D(XmlReader);
  485. d->contenthandler = handler;
  486. }
  487. QXmlContentHandler* XmlReader::contentHandler(void) const
  488. {
  489. const XmlReaderPrivate* d = this->d_func();
  490. return d->contenthandler;
  491. }
  492. void XmlReader::setErrorHandler(QXmlErrorHandler* handler)
  493. {
  494. Q_D(XmlReader);
  495. d->errorhandler = handler;
  496. }
  497. QXmlErrorHandler* XmlReader::errorHandler(void) const
  498. {
  499. const XmlReaderPrivate* d = this->d_func();
  500. return d->errorhandler;
  501. }
  502. void XmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  503. {
  504. Q_D(XmlReader);
  505. d->lexicalhandler = handler;
  506. }
  507. QXmlLexicalHandler* XmlReader::lexicalHandler(void) const
  508. {
  509. const XmlReaderPrivate* d = this->d_func();
  510. return d->lexicalhandler;
  511. }
  512. void XmlReader::setDeclHandler(QXmlDeclHandler* handler)
  513. {
  514. Q_D(XmlReader);
  515. d->declhandler = handler;
  516. }
  517. QXmlDeclHandler* XmlReader::declHandler(void) const
  518. {
  519. const XmlReaderPrivate* d = this->d_func();
  520. return d->declhandler;
  521. }
  522. bool XmlReader::parse(const QXmlInputSource& input)
  523. {
  524. return this->parse(&input);
  525. }
  526. bool XmlReader::parse(const QXmlInputSource* input)
  527. {
  528. Q_D(XmlReader);
  529. if (d->contenthandler) {
  530. d->contenthandler->setDocumentLocator(d->locator.data());
  531. }
  532. d->parse(input);
  533. return true;
  534. }
  535. bool XmlReader::parse(QIODevice& input)
  536. {
  537. Q_D(XmlReader);
  538. if (d->contenthandler) {
  539. d->contenthandler->setDocumentLocator(d->locator.data());
  540. }
  541. d->parse(input);
  542. return true;
  543. }
  544. int XmlReaderLocator::columnNumber(void) const
  545. {
  546. return xmlTextReaderGetParserColumnNumber(this->reader->d_func()->m_reader);
  547. }
  548. int XmlReaderLocator::lineNumber(void) const
  549. {
  550. return xmlTextReaderGetParserLineNumber(this->reader->d_func()->m_reader);
  551. }
  552. } // namespace XML2
  553. #endif // FB2_USE_LIBXML2