fb2xml2.cpp 17 KB


  1. #include "fb2xml2.h"
  2. #include <cstring>
  3. #include <libxml/tree.h>
  4. #include <libxml/parser.h>
  5. #include <libxml/HTMLparser.h>
  6. #include <libxml/xmlreader.h>
  7. #include <QtDebug>
  8. namespace XML2 {
  9. //---------------------------------------------------------------------------
  10. // XML2::HtmlReader
  11. //---------------------------------------------------------------------------
  12. class HtmlReaderLocator : public QXmlLocator {
  13. public:
  14. HtmlReaderLocator(HtmlReader* r) : reader(r) {}
  15. virtual int columnNumber(void) const;
  16. virtual int lineNumber(void) const;
  17. private:
  18. HtmlReader* reader;
  19. };
  20. class HtmlReaderPrivate {
  21. public:
  22. ~HtmlReaderPrivate(void) {}
  23. private:
  24. HtmlReaderPrivate(HtmlReader* reader);
  25. static void startDocument(void* c);
  26. static void endDocument(void* c);
  27. static void startElement(void* c, const xmlChar* name, const xmlChar** attrs);
  28. static void endElement(void* c, const xmlChar* name);
  29. static void comment(void* c, const xmlChar* value);
  30. static void cdataBlock(void* c, const xmlChar* value, int len);
  31. static void processingInstruction(void* c, const xmlChar* target, const xmlChar* data);
  32. static void characters(void* c, const xmlChar* ch, int len);
  33. static void ignorableWhitespace(void* c, const xmlChar* ch, int len);
  34. static void internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId);
  35. static QString C2S(const xmlChar* text, int size = -1);
  36. static QString local(const QString &name);
  37. void parse(const QXmlInputSource* input);
  38. QScopedPointer<HtmlReaderLocator> locator;
  39. Q_DECLARE_PUBLIC(HtmlReader)
  40. HtmlReader* q_ptr;
  41. QXmlEntityResolver* entityresolver;
  42. QXmlDTDHandler* dtdhandler;
  43. QXmlContentHandler* contenthandler;
  44. QXmlErrorHandler* errorhandler;
  45. QXmlLexicalHandler* lexicalhandler;
  46. QXmlDeclHandler* declhandler;
  47. xmlParserCtxt* context;
  48. friend class HtmlReaderLocator;
  49. };
  50. HtmlReaderPrivate::HtmlReaderPrivate(HtmlReader* reader)
  51. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), context(0)
  52. {
  53. this->locator.reset(new HtmlReaderLocator(reader));
  54. }
  55. QString HtmlReaderPrivate::C2S(const xmlChar* text, int size)
  56. {
  57. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  58. }
  59. void HtmlReaderPrivate::parse(const QXmlInputSource* input)
  60. {
  61. htmlSAXHandler handler;
  62. QByteArray arr = input->data().toUtf8();
  63. std::memset(&handler, 0, sizeof(handler));
  64. handler.startDocument = &HtmlReaderPrivate::startDocument;
  65. handler.endDocument = &HtmlReaderPrivate::endDocument;
  66. handler.startElement = &HtmlReaderPrivate::startElement;
  67. handler.endElement = &HtmlReaderPrivate::endElement;
  68. handler.comment = &HtmlReaderPrivate::comment;
  69. handler.cdataBlock = &HtmlReaderPrivate::cdataBlock;
  70. handler.processingInstruction = &HtmlReaderPrivate::processingInstruction;
  71. handler.characters = &HtmlReaderPrivate::characters;
  72. handler.ignorableWhitespace = &HtmlReaderPrivate::ignorableWhitespace;
  73. handler.internalSubset = &HtmlReaderPrivate::internalSubset;
  74. this->context = htmlCreatePushParserCtxt(&handler, this, arr.constData(), arr.size(), "", XML_CHAR_ENCODING_UTF8);
  75. htmlParseChunk(this->context, NULL, 0, 1);
  76. htmlFreeParserCtxt(this->context);
  77. xmlCleanupParser();
  78. }
  79. void HtmlReaderPrivate::startDocument(void* c)
  80. {
  81. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  82. if (r->contenthandler) {
  83. r->contenthandler->startDocument();
  84. }
  85. }
  86. void HtmlReaderPrivate::endDocument(void* c)
  87. {
  88. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  89. if (r->contenthandler) {
  90. r->contenthandler->endDocument();
  91. }
  92. }
  93. QString HtmlReaderPrivate::local(const QString &name)
  94. {
  95. return name.mid(name.lastIndexOf(":"));
  96. }
  97. void HtmlReaderPrivate::startElement(void* c, const xmlChar* name, const xmlChar** attrs)
  98. {
  99. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  100. if (r->contenthandler) {
  101. QXmlAttributes a;
  102. if (attrs) {
  103. int i = 0;
  104. while (attrs[i]) {
  105. QString qName = C2S(attrs[i]);
  106. a.append(qName, "", local(qName), C2S(attrs[i+1]));
  107. i += 2;
  108. }
  109. }
  110. QString qName = C2S(name);
  111. r->contenthandler->startElement("", local(qName), qName, a);
  112. }
  113. }
  114. void HtmlReaderPrivate::endElement(void* c, const xmlChar* name)
  115. {
  116. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  117. if (r->contenthandler) {
  118. QString qName = C2S(name);
  119. r->contenthandler->endElement("", local(qName), qName);
  120. }
  121. }
  122. void HtmlReaderPrivate::comment(void* c, const xmlChar* value)
  123. {
  124. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  125. if (r->lexicalhandler) {
  126. r->lexicalhandler->comment(C2S(value));
  127. }
  128. }
  129. void HtmlReaderPrivate::cdataBlock(void* c, const xmlChar* value, int len)
  130. {
  131. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  132. if (r->lexicalhandler) {
  133. r->lexicalhandler->startCDATA();
  134. if (r->contenthandler) {
  135. r->contenthandler->characters(C2S(value, len));
  136. }
  137. r->lexicalhandler->endCDATA();
  138. }
  139. }
  140. void HtmlReaderPrivate::processingInstruction(void* c, const xmlChar* target, const xmlChar* data)
  141. {
  142. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  143. if (r->contenthandler) {
  144. r->contenthandler->processingInstruction(C2S(target), C2S(data));
  145. }
  146. }
  147. void HtmlReaderPrivate::characters(void* c, const xmlChar* ch, int len)
  148. {
  149. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  150. if (r->contenthandler) {
  151. r->contenthandler->characters(C2S(ch, len));
  152. }
  153. }
  154. void HtmlReaderPrivate::ignorableWhitespace(void* c, const xmlChar* ch, int len)
  155. {
  156. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  157. if (r->contenthandler) {
  158. r->contenthandler->ignorableWhitespace(C2S(ch, len));
  159. }
  160. }
  161. void HtmlReaderPrivate::internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId)
  162. {
  163. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  164. if (r->lexicalhandler) {
  165. r->lexicalhandler->startDTD(C2S(name), C2S(publicId), C2S(systemId));
  166. r->lexicalhandler->endDTD();
  167. }
  168. }
  169. HtmlReader::HtmlReader(void)
  170. : d_ptr(new HtmlReaderPrivate(this))
  171. {
  172. }
  173. HtmlReader::~HtmlReader(void)
  174. {
  175. }
  176. bool HtmlReader::feature(const QString&, bool* ok) const
  177. {
  178. if (ok) {
  179. *ok = false;
  180. }
  181. return false;
  182. }
  183. void HtmlReader::setFeature(const QString&, bool)
  184. {
  185. }
  186. bool HtmlReader::hasFeature(const QString&) const
  187. {
  188. return false;
  189. }
  190. void* HtmlReader::property(const QString&, bool* ok) const
  191. {
  192. if (ok) {
  193. *ok = false;
  194. }
  195. return 0;
  196. }
  197. void HtmlReader::setProperty(const QString&, void*)
  198. {
  199. }
  200. bool HtmlReader::hasProperty(const QString&) const
  201. {
  202. return false;
  203. }
  204. void HtmlReader::setEntityResolver(QXmlEntityResolver* handler)
  205. {
  206. Q_D(HtmlReader);
  207. d->entityresolver = handler;
  208. }
  209. QXmlEntityResolver* HtmlReader::entityResolver(void) const
  210. {
  211. const HtmlReaderPrivate* d = this->d_func();
  212. return d->entityresolver;
  213. }
  214. void HtmlReader::setDTDHandler(QXmlDTDHandler* handler)
  215. {
  216. Q_D(HtmlReader);
  217. d->dtdhandler = handler;
  218. }
  219. QXmlDTDHandler* HtmlReader::DTDHandler(void) const
  220. {
  221. const HtmlReaderPrivate* d = this->d_func();
  222. return d->dtdhandler;
  223. }
  224. void HtmlReader::setContentHandler(QXmlContentHandler* handler)
  225. {
  226. Q_D(HtmlReader);
  227. d->contenthandler = handler;
  228. }
  229. QXmlContentHandler* HtmlReader::contentHandler(void) const
  230. {
  231. const HtmlReaderPrivate* d = this->d_func();
  232. return d->contenthandler;
  233. }
  234. void HtmlReader::setErrorHandler(QXmlErrorHandler* handler)
  235. {
  236. Q_D(HtmlReader);
  237. d->errorhandler = handler;
  238. }
  239. QXmlErrorHandler* HtmlReader::errorHandler(void) const
  240. {
  241. const HtmlReaderPrivate* d = this->d_func();
  242. return d->errorhandler;
  243. }
  244. void HtmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  245. {
  246. Q_D(HtmlReader);
  247. d->lexicalhandler = handler;
  248. }
  249. QXmlLexicalHandler* HtmlReader::lexicalHandler(void) const
  250. {
  251. const HtmlReaderPrivate* d = this->d_func();
  252. return d->lexicalhandler;
  253. }
  254. void HtmlReader::setDeclHandler(QXmlDeclHandler* handler)
  255. {
  256. Q_D(HtmlReader);
  257. d->declhandler = handler;
  258. }
  259. QXmlDeclHandler* HtmlReader::declHandler(void) const
  260. {
  261. const HtmlReaderPrivate* d = this->d_func();
  262. return d->declhandler;
  263. }
  264. bool HtmlReader::parse(const QXmlInputSource& input)
  265. {
  266. return this->parse(&input);
  267. }
  268. bool HtmlReader::parse(const QXmlInputSource* input)
  269. {
  270. Q_D(HtmlReader);
  271. if (d->contenthandler) {
  272. d->contenthandler->setDocumentLocator(d->locator.data());
  273. }
  274. d->parse(input);
  275. return true;
  276. }
  277. int HtmlReaderLocator::columnNumber(void) const
  278. {
  279. return this->reader->d_func()->context->input->col;
  280. }
  281. int HtmlReaderLocator::lineNumber(void) const
  282. {
  283. return this->reader->d_func()->context->input->line;
  284. }
  285. //---------------------------------------------------------------------------
  286. // XML2::HtmlReader
  287. //---------------------------------------------------------------------------
  288. class XmlReaderLocator : public QXmlLocator {
  289. public:
  290. XmlReaderLocator(XmlReader* r) : reader(r) {}
  291. virtual int columnNumber(void) const;
  292. virtual int lineNumber(void) const;
  293. private:
  294. XmlReader* reader;
  295. };
  296. class XmlReaderPrivate {
  297. public:
  298. ~XmlReaderPrivate(void) {}
  299. private:
  300. XmlReaderPrivate(XmlReader* reader);
  301. static void onError(void *arg, const char *msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator);
  302. static int onRead(void * context, char * buffer, int len);
  303. static QString C2S(const xmlChar* text, int size = -1);
  304. bool parse(const QXmlInputSource* input);
  305. bool parse(QIODevice& input);
  306. void process(xmlTextReaderPtr reader);
  307. QScopedPointer<XmlReaderLocator> locator;
  308. Q_DECLARE_PUBLIC(XmlReader)
  309. XmlReader* q_ptr;
  310. QXmlEntityResolver* entityresolver;
  311. QXmlDTDHandler* dtdhandler;
  312. QXmlContentHandler* contenthandler;
  313. QXmlErrorHandler* errorhandler;
  314. QXmlLexicalHandler* lexicalhandler;
  315. QXmlDeclHandler* declhandler;
  316. xmlTextReaderPtr m_reader;
  317. friend class XmlReaderLocator;
  318. };
  319. XmlReaderPrivate::XmlReaderPrivate(XmlReader* reader)
  320. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), m_reader(0)
  321. {
  322. this->locator.reset(new XmlReaderLocator(reader));
  323. }
  324. QString XmlReaderPrivate::C2S(const xmlChar* text, int size)
  325. {
  326. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  327. }
  328. void XmlReaderPrivate::onError(void * arg, const char * msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator)
  329. {
  330. XmlReaderPrivate* r = reinterpret_cast<XmlReaderPrivate*>(arg);
  331. if (r->errorhandler) {
  332. QXmlParseException e(QString::fromLocal8Bit(msg), xmlTextReaderGetParserColumnNumber(r->m_reader), xmlTextReaderGetParserLineNumber(r->m_reader));
  333. switch (severity) {
  334. case XML_PARSER_SEVERITY_VALIDITY_WARNING: r->errorhandler->warning(e); break;
  335. case XML_PARSER_SEVERITY_VALIDITY_ERROR: r->errorhandler->error(e); break;
  336. case XML_PARSER_SEVERITY_WARNING: r->errorhandler->warning(e); break;
  337. case XML_PARSER_SEVERITY_ERROR: r->errorhandler->error(e); break;
  338. }
  339. }
  340. }
  341. void XmlReaderPrivate::process(xmlTextReaderPtr reader)
  342. {
  343. if (!contenthandler) return;
  344. switch (xmlTextReaderNodeType(reader)) {
  345. case XML_READER_TYPE_ELEMENT: {
  346. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  347. QString qName = C2S(xmlTextReaderConstName(reader));
  348. bool empty = xmlTextReaderIsEmptyElement(reader);
  349. QXmlAttributes atts;
  350. while (xmlTextReaderMoveToNextAttribute(reader)) {
  351. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  352. QString qName = C2S(xmlTextReaderConstName(reader));
  353. QString value = C2S(xmlTextReaderConstValue(reader));
  354. atts.append(qName, "", localName, value);
  355. }
  356. contenthandler->startElement("", localName, qName, atts);
  357. if (empty) contenthandler->endElement("", localName, qName);
  358. } break;
  359. case XML_READER_TYPE_TEXT: {
  360. QString value = C2S(xmlTextReaderConstValue(reader));
  361. contenthandler->characters(value);
  362. } break;
  363. case XML_READER_TYPE_END_ELEMENT: {
  364. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  365. QString qName = C2S(xmlTextReaderConstName(reader));
  366. contenthandler->endElement("", localName, qName);
  367. } break;
  368. }
  369. }
  370. int XmlReaderPrivate::onRead(void * context, char * buffer, int len)
  371. {
  372. QIODevice *device = reinterpret_cast<QIODevice*>(context);
  373. return device->read(buffer, len);
  374. }
  375. bool XmlReaderPrivate::parse(const QXmlInputSource* input)
  376. {
  377. QByteArray arr = input->data().toUtf8();
  378. int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET;
  379. m_reader = xmlReaderForMemory(arr.constData(), arr.size(), NULL, NULL, options);
  380. if (!m_reader) return false;
  381. xmlTextReaderSetErrorHandler(m_reader, &XmlReaderPrivate::onError, this);
  382. while (xmlTextReaderRead(m_reader) == 1) process(m_reader);
  383. xmlFreeTextReader(m_reader);
  384. return true;
  385. }
  386. bool XmlReaderPrivate::parse(QIODevice& input)
  387. {
  388. int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET;
  389. m_reader = xmlReaderForIO(&XmlReaderPrivate::onRead, NULL, &input, NULL, NULL, options);
  390. if (!m_reader) return false;
  391. xmlTextReaderSetErrorHandler(m_reader, &XmlReaderPrivate::onError, this);
  392. while (xmlTextReaderRead(m_reader) == 1) process(m_reader);
  393. xmlFreeTextReader(m_reader);
  394. return true;
  395. }
  396. XmlReader::XmlReader(void)
  397. : d_ptr(new XmlReaderPrivate(this))
  398. {
  399. }
  400. XmlReader::~XmlReader(void)
  401. {
  402. }
  403. bool XmlReader::feature(const QString&, bool* ok) const
  404. {
  405. if (ok) *ok = false;
  406. return false;
  407. }
  408. void XmlReader::setFeature(const QString&, bool)
  409. {
  410. }
  411. bool XmlReader::hasFeature(const QString&) const
  412. {
  413. return false;
  414. }
  415. void* XmlReader::property(const QString&, bool* ok) const
  416. {
  417. if (ok) *ok = false;
  418. return 0;
  419. }
  420. void XmlReader::setProperty(const QString&, void*)
  421. {
  422. }
  423. bool XmlReader::hasProperty(const QString&) const
  424. {
  425. return false;
  426. }
  427. void XmlReader::setEntityResolver(QXmlEntityResolver* handler)
  428. {
  429. Q_D(XmlReader);
  430. d->entityresolver = handler;
  431. }
  432. QXmlEntityResolver* XmlReader::entityResolver(void) const
  433. {
  434. const XmlReaderPrivate* d = this->d_func();
  435. return d->entityresolver;
  436. }
  437. void XmlReader::setDTDHandler(QXmlDTDHandler* handler)
  438. {
  439. Q_D(XmlReader);
  440. d->dtdhandler = handler;
  441. }
  442. QXmlDTDHandler* XmlReader::DTDHandler(void) const
  443. {
  444. const XmlReaderPrivate* d = this->d_func();
  445. return d->dtdhandler;
  446. }
  447. void XmlReader::setContentHandler(QXmlContentHandler* handler)
  448. {
  449. Q_D(XmlReader);
  450. d->contenthandler = handler;
  451. }
  452. QXmlContentHandler* XmlReader::contentHandler(void) const
  453. {
  454. const XmlReaderPrivate* d = this->d_func();
  455. return d->contenthandler;
  456. }
  457. void XmlReader::setErrorHandler(QXmlErrorHandler* handler)
  458. {
  459. Q_D(XmlReader);
  460. d->errorhandler = handler;
  461. }
  462. QXmlErrorHandler* XmlReader::errorHandler(void) const
  463. {
  464. const XmlReaderPrivate* d = this->d_func();
  465. return d->errorhandler;
  466. }
  467. void XmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  468. {
  469. Q_D(XmlReader);
  470. d->lexicalhandler = handler;
  471. }
  472. QXmlLexicalHandler* XmlReader::lexicalHandler(void) const
  473. {
  474. const XmlReaderPrivate* d = this->d_func();
  475. return d->lexicalhandler;
  476. }
  477. void XmlReader::setDeclHandler(QXmlDeclHandler* handler)
  478. {
  479. Q_D(XmlReader);
  480. d->declhandler = handler;
  481. }
  482. QXmlDeclHandler* XmlReader::declHandler(void) const
  483. {
  484. const XmlReaderPrivate* d = this->d_func();
  485. return d->declhandler;
  486. }
  487. bool XmlReader::parse(const QXmlInputSource& input)
  488. {
  489. return this->parse(&input);
  490. }
  491. bool XmlReader::parse(const QXmlInputSource* input)
  492. {
  493. Q_D(XmlReader);
  494. if (d->contenthandler) {
  495. d->contenthandler->setDocumentLocator(d->locator.data());
  496. }
  497. d->parse(input);
  498. return true;
  499. }
  500. bool XmlReader::parse(QIODevice& input)
  501. {
  502. Q_D(XmlReader);
  503. if (d->contenthandler) {
  504. d->contenthandler->setDocumentLocator(d->locator.data());
  505. }
  506. d->parse(input);
  507. return true;
  508. }
  509. int XmlReaderLocator::columnNumber(void) const
  510. {
  511. return xmlTextReaderGetParserColumnNumber(this->reader->d_func()->m_reader);
  512. }
  513. int XmlReaderLocator::lineNumber(void) const
  514. {
  515. return xmlTextReaderGetParserLineNumber(this->reader->d_func()->m_reader);
  516. }
  517. } // namespace XML2