fb2xml2.cpp 16 KB


  1. #include "fb2xml2.h"
  2. #include <cstring>
  3. #include <libxml/tree.h>
  4. #include <libxml/parser.h>
  5. #include <libxml/HTMLparser.h>
  6. #include <libxml/xmlreader.h>
  7. namespace XML2 {
  8. //---------------------------------------------------------------------------
  9. // XML2::HtmlReader
  10. //---------------------------------------------------------------------------
  11. class HtmlReaderLocator : public QXmlLocator {
  12. public:
  13. HtmlReaderLocator(HtmlReader* r) : reader(r) {}
  14. virtual int columnNumber(void) const;
  15. virtual int lineNumber(void) const;
  16. private:
  17. HtmlReader* reader;
  18. };
  19. class HtmlReaderPrivate {
  20. public:
  21. ~HtmlReaderPrivate(void) {}
  22. private:
  23. HtmlReaderPrivate(HtmlReader* reader);
  24. static void startDocument(void* c);
  25. static void endDocument(void* c);
  26. static void startElement(void* c, const xmlChar* name, const xmlChar** attrs);
  27. static void endElement(void* c, const xmlChar* name);
  28. static void comment(void* c, const xmlChar* value);
  29. static void cdataBlock(void* c, const xmlChar* value, int len);
  30. static void processingInstruction(void* c, const xmlChar* target, const xmlChar* data);
  31. static void characters(void* c, const xmlChar* ch, int len);
  32. static void ignorableWhitespace(void* c, const xmlChar* ch, int len);
  33. static void internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId);
  34. static QString C2S(const xmlChar* text, int size = -1);
  35. static QString local(const QString &name);
  36. void parse(const QXmlInputSource* input);
  37. QScopedPointer<HtmlReaderLocator> locator;
  38. Q_DECLARE_PUBLIC(HtmlReader)
  39. HtmlReader* q_ptr;
  40. QXmlEntityResolver* entityresolver;
  41. QXmlDTDHandler* dtdhandler;
  42. QXmlContentHandler* contenthandler;
  43. QXmlErrorHandler* errorhandler;
  44. QXmlLexicalHandler* lexicalhandler;
  45. QXmlDeclHandler* declhandler;
  46. xmlParserCtxt* context;
  47. friend class HtmlReaderLocator;
  48. };
  49. HtmlReaderPrivate::HtmlReaderPrivate(HtmlReader* reader)
  50. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), context(0)
  51. {
  52. this->locator.reset(new HtmlReaderLocator(reader));
  53. }
  54. QString HtmlReaderPrivate::C2S(const xmlChar* text, int size)
  55. {
  56. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  57. }
  58. void HtmlReaderPrivate::parse(const QXmlInputSource* input)
  59. {
  60. htmlSAXHandler handler;
  61. QByteArray arr = input->data().toUtf8();
  62. const char* data = arr.data();
  63. std::memset(&handler, 0, sizeof(handler));
  64. handler.startDocument = &HtmlReaderPrivate::startDocument;
  65. handler.endDocument = &HtmlReaderPrivate::endDocument;
  66. handler.startElement = &HtmlReaderPrivate::startElement;
  67. handler.endElement = &HtmlReaderPrivate::endElement;
  68. handler.comment = &HtmlReaderPrivate::comment;
  69. handler.cdataBlock = &HtmlReaderPrivate::cdataBlock;
  70. handler.processingInstruction = &HtmlReaderPrivate::processingInstruction;
  71. handler.characters = &HtmlReaderPrivate::characters;
  72. handler.ignorableWhitespace = &HtmlReaderPrivate::ignorableWhitespace;
  73. handler.internalSubset = &HtmlReaderPrivate::internalSubset;
  74. this->context = htmlCreatePushParserCtxt(&handler, this, data, xmlStrlen(reinterpret_cast<const xmlChar*>(data)), "", XML_CHAR_ENCODING_UTF8);
  75. htmlParseChunk(this->context, NULL, 0, 1);
  76. htmlFreeParserCtxt(this->context);
  77. xmlCleanupParser();
  78. }
  79. void HtmlReaderPrivate::startDocument(void* c)
  80. {
  81. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  82. if (r->contenthandler) {
  83. r->contenthandler->startDocument();
  84. }
  85. }
  86. void HtmlReaderPrivate::endDocument(void* c)
  87. {
  88. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  89. if (r->contenthandler) {
  90. r->contenthandler->endDocument();
  91. }
  92. }
  93. QString HtmlReaderPrivate::local(const QString &name)
  94. {
  95. return name.mid(name.lastIndexOf(":"));
  96. }
  97. void HtmlReaderPrivate::startElement(void* c, const xmlChar* name, const xmlChar** attrs)
  98. {
  99. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  100. if (r->contenthandler) {
  101. QXmlAttributes a;
  102. if (attrs) {
  103. int i = 0;
  104. while (attrs[i]) {
  105. QString qName = C2S(attrs[i]);
  106. a.append(qName, "", local(qName), C2S(attrs[i+1]));
  107. i += 2;
  108. }
  109. }
  110. QString qName = C2S(name);
  111. r->contenthandler->startElement("", local(qName), qName, a);
  112. }
  113. }
  114. void HtmlReaderPrivate::endElement(void* c, const xmlChar* name)
  115. {
  116. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  117. if (r->contenthandler) {
  118. QString qName = C2S(name);
  119. r->contenthandler->endElement("", local(qName), qName);
  120. }
  121. }
  122. void HtmlReaderPrivate::comment(void* c, const xmlChar* value)
  123. {
  124. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  125. if (r->lexicalhandler) {
  126. r->lexicalhandler->comment(C2S(value));
  127. }
  128. }
  129. void HtmlReaderPrivate::cdataBlock(void* c, const xmlChar* value, int len)
  130. {
  131. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  132. if (r->lexicalhandler) {
  133. r->lexicalhandler->startCDATA();
  134. if (r->contenthandler) {
  135. r->contenthandler->characters(C2S(value, len));
  136. }
  137. r->lexicalhandler->endCDATA();
  138. }
  139. }
  140. void HtmlReaderPrivate::processingInstruction(void* c, const xmlChar* target, const xmlChar* data)
  141. {
  142. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  143. if (r->contenthandler) {
  144. r->contenthandler->processingInstruction(C2S(target), C2S(data));
  145. }
  146. }
  147. void HtmlReaderPrivate::characters(void* c, const xmlChar* ch, int len)
  148. {
  149. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  150. if (r->contenthandler) {
  151. r->contenthandler->characters(C2S(ch, len));
  152. }
  153. }
  154. void HtmlReaderPrivate::ignorableWhitespace(void* c, const xmlChar* ch, int len)
  155. {
  156. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  157. if (r->contenthandler) {
  158. r->contenthandler->ignorableWhitespace(C2S(ch, len));
  159. }
  160. }
  161. void HtmlReaderPrivate::internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId)
  162. {
  163. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  164. if (r->lexicalhandler) {
  165. r->lexicalhandler->startDTD(C2S(name), C2S(publicId), C2S(systemId));
  166. r->lexicalhandler->endDTD();
  167. }
  168. }
  169. HtmlReader::HtmlReader(void)
  170. : d_ptr(new HtmlReaderPrivate(this))
  171. {
  172. }
  173. HtmlReader::~HtmlReader(void)
  174. {
  175. }
  176. bool HtmlReader::feature(const QString&, bool* ok) const
  177. {
  178. if (ok) {
  179. *ok = false;
  180. }
  181. return false;
  182. }
  183. void HtmlReader::setFeature(const QString&, bool)
  184. {
  185. }
  186. bool HtmlReader::hasFeature(const QString&) const
  187. {
  188. return false;
  189. }
  190. void* HtmlReader::property(const QString&, bool* ok) const
  191. {
  192. if (ok) {
  193. *ok = false;
  194. }
  195. return 0;
  196. }
  197. void HtmlReader::setProperty(const QString&, void*)
  198. {
  199. }
  200. bool HtmlReader::hasProperty(const QString&) const
  201. {
  202. return false;
  203. }
  204. void HtmlReader::setEntityResolver(QXmlEntityResolver* handler)
  205. {
  206. Q_D(HtmlReader);
  207. d->entityresolver = handler;
  208. }
  209. QXmlEntityResolver* HtmlReader::entityResolver(void) const
  210. {
  211. const HtmlReaderPrivate* d = this->d_func();
  212. return d->entityresolver;
  213. }
  214. void HtmlReader::setDTDHandler(QXmlDTDHandler* handler)
  215. {
  216. Q_D(HtmlReader);
  217. d->dtdhandler = handler;
  218. }
  219. QXmlDTDHandler* HtmlReader::DTDHandler(void) const
  220. {
  221. const HtmlReaderPrivate* d = this->d_func();
  222. return d->dtdhandler;
  223. }
  224. void HtmlReader::setContentHandler(QXmlContentHandler* handler)
  225. {
  226. Q_D(HtmlReader);
  227. d->contenthandler = handler;
  228. }
  229. QXmlContentHandler* HtmlReader::contentHandler(void) const
  230. {
  231. const HtmlReaderPrivate* d = this->d_func();
  232. return d->contenthandler;
  233. }
  234. void HtmlReader::setErrorHandler(QXmlErrorHandler* handler)
  235. {
  236. Q_D(HtmlReader);
  237. d->errorhandler = handler;
  238. }
  239. QXmlErrorHandler* HtmlReader::errorHandler(void) const
  240. {
  241. const HtmlReaderPrivate* d = this->d_func();
  242. return d->errorhandler;
  243. }
  244. void HtmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  245. {
  246. Q_D(HtmlReader);
  247. d->lexicalhandler = handler;
  248. }
  249. QXmlLexicalHandler* HtmlReader::lexicalHandler(void) const
  250. {
  251. const HtmlReaderPrivate* d = this->d_func();
  252. return d->lexicalhandler;
  253. }
  254. void HtmlReader::setDeclHandler(QXmlDeclHandler* handler)
  255. {
  256. Q_D(HtmlReader);
  257. d->declhandler = handler;
  258. }
  259. QXmlDeclHandler* HtmlReader::declHandler(void) const
  260. {
  261. const HtmlReaderPrivate* d = this->d_func();
  262. return d->declhandler;
  263. }
  264. bool HtmlReader::parse(const QXmlInputSource& input)
  265. {
  266. return this->parse(&input);
  267. }
  268. bool HtmlReader::parse(const QXmlInputSource* input)
  269. {
  270. Q_D(HtmlReader);
  271. if (d->contenthandler) {
  272. d->contenthandler->setDocumentLocator(d->locator.data());
  273. }
  274. d->parse(input);
  275. return true;
  276. }
  277. int HtmlReaderLocator::columnNumber(void) const
  278. {
  279. return this->reader->d_func()->context->input->col;
  280. }
  281. int HtmlReaderLocator::lineNumber(void) const
  282. {
  283. return this->reader->d_func()->context->input->line;
  284. }
  285. //---------------------------------------------------------------------------
  286. // XML2::HtmlReader
  287. //---------------------------------------------------------------------------
  288. class XmlReaderLocator : public QXmlLocator {
  289. public:
  290. XmlReaderLocator(XmlReader* r) : reader(r) {}
  291. virtual int columnNumber(void) const;
  292. virtual int lineNumber(void) const;
  293. private:
  294. XmlReader* reader;
  295. };
  296. class XmlReaderPrivate {
  297. public:
  298. ~XmlReaderPrivate(void) {}
  299. private:
  300. XmlReaderPrivate(XmlReader* reader);
  301. static void onError(void *arg, const char *msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator);
  302. static int onRead(void * context, char * buffer, int len);
  303. static QString C2S(const xmlChar* text, int size = -1);
  304. bool parse(QIODevice& input);
  305. void process(xmlTextReaderPtr reader);
  306. QScopedPointer<XmlReaderLocator> locator;
  307. Q_DECLARE_PUBLIC(XmlReader)
  308. XmlReader* q_ptr;
  309. QXmlEntityResolver* entityresolver;
  310. QXmlDTDHandler* dtdhandler;
  311. QXmlContentHandler* contenthandler;
  312. QXmlErrorHandler* errorhandler;
  313. QXmlLexicalHandler* lexicalhandler;
  314. QXmlDeclHandler* declhandler;
  315. xmlTextReaderPtr m_reader;
  316. friend class XmlReaderLocator;
  317. };
  318. XmlReaderPrivate::XmlReaderPrivate(XmlReader* reader)
  319. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), m_reader(0)
  320. {
  321. this->locator.reset(new XmlReaderLocator(reader));
  322. }
  323. QString XmlReaderPrivate::C2S(const xmlChar* text, int size)
  324. {
  325. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  326. }
  327. void XmlReaderPrivate::onError(void * arg, const char * msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator)
  328. {
  329. XmlReaderPrivate* r = reinterpret_cast<XmlReaderPrivate*>(arg);
  330. if (r->errorhandler) {
  331. QXmlParseException e(QString::fromLocal8Bit(msg), xmlTextReaderGetParserColumnNumber(r->m_reader), xmlTextReaderGetParserLineNumber(r->m_reader));
  332. switch (severity) {
  333. case XML_PARSER_SEVERITY_VALIDITY_WARNING: r->errorhandler->warning(e); break;
  334. case XML_PARSER_SEVERITY_VALIDITY_ERROR: r->errorhandler->error(e); break;
  335. case XML_PARSER_SEVERITY_WARNING: r->errorhandler->warning(e); break;
  336. case XML_PARSER_SEVERITY_ERROR: r->errorhandler->error(e); break;
  337. }
  338. }
  339. }
  340. void XmlReaderPrivate::process(xmlTextReaderPtr reader)
  341. {
  342. if (!contenthandler) return;
  343. switch (xmlTextReaderNodeType(reader)) {
  344. case XML_READER_TYPE_ELEMENT: {
  345. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  346. QString qName = C2S(xmlTextReaderConstName(reader));
  347. bool empty = xmlTextReaderIsEmptyElement(reader);
  348. QXmlAttributes atts;
  349. while (xmlTextReaderMoveToNextAttribute(reader)) {
  350. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  351. QString qName = C2S(xmlTextReaderConstName(reader));
  352. QString value = C2S(xmlTextReaderConstValue(reader));
  353. atts.append(qName, "", localName, value);
  354. }
  355. contenthandler->startElement("", localName, qName, atts);
  356. if (empty) contenthandler->endElement("", localName, qName);
  357. } break;
  358. case XML_READER_TYPE_TEXT: {
  359. QString value = C2S(xmlTextReaderConstValue(reader));
  360. contenthandler->characters(value);
  361. } break;
  362. case XML_READER_TYPE_END_ELEMENT: {
  363. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  364. QString qName = C2S(xmlTextReaderConstName(reader));
  365. contenthandler->endElement("", localName, qName);
  366. } break;
  367. }
  368. }
  369. int XmlReaderPrivate::onRead(void * context, char * buffer, int len)
  370. {
  371. QIODevice *device = reinterpret_cast<QIODevice*>(context);
  372. return device->read(buffer, len);
  373. }
  374. bool XmlReaderPrivate::parse(QIODevice& input)
  375. {
  376. int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET;
  377. m_reader = xmlReaderForIO(&XmlReaderPrivate::onRead, NULL, &input, NULL, NULL, options);
  378. if (!m_reader) return false;
  379. xmlTextReaderSetErrorHandler(m_reader, &XmlReaderPrivate::onError, this);
  380. while (xmlTextReaderRead(m_reader) == 1) process(m_reader);
  381. xmlFreeTextReader(m_reader);
  382. return true;
  383. }
  384. XmlReader::XmlReader(void)
  385. : d_ptr(new XmlReaderPrivate(this))
  386. {
  387. }
  388. XmlReader::~XmlReader(void)
  389. {
  390. }
  391. bool XmlReader::feature(const QString&, bool* ok) const
  392. {
  393. if (ok) *ok = false;
  394. return false;
  395. }
  396. void XmlReader::setFeature(const QString&, bool)
  397. {
  398. }
  399. bool XmlReader::hasFeature(const QString&) const
  400. {
  401. return false;
  402. }
  403. void* XmlReader::property(const QString&, bool* ok) const
  404. {
  405. if (ok) *ok = false;
  406. return 0;
  407. }
  408. void XmlReader::setProperty(const QString&, void*)
  409. {
  410. }
  411. bool XmlReader::hasProperty(const QString&) const
  412. {
  413. return false;
  414. }
  415. void XmlReader::setEntityResolver(QXmlEntityResolver* handler)
  416. {
  417. Q_D(XmlReader);
  418. d->entityresolver = handler;
  419. }
  420. QXmlEntityResolver* XmlReader::entityResolver(void) const
  421. {
  422. const XmlReaderPrivate* d = this->d_func();
  423. return d->entityresolver;
  424. }
  425. void XmlReader::setDTDHandler(QXmlDTDHandler* handler)
  426. {
  427. Q_D(XmlReader);
  428. d->dtdhandler = handler;
  429. }
  430. QXmlDTDHandler* XmlReader::DTDHandler(void) const
  431. {
  432. const XmlReaderPrivate* d = this->d_func();
  433. return d->dtdhandler;
  434. }
  435. void XmlReader::setContentHandler(QXmlContentHandler* handler)
  436. {
  437. Q_D(XmlReader);
  438. d->contenthandler = handler;
  439. }
  440. QXmlContentHandler* XmlReader::contentHandler(void) const
  441. {
  442. const XmlReaderPrivate* d = this->d_func();
  443. return d->contenthandler;
  444. }
  445. void XmlReader::setErrorHandler(QXmlErrorHandler* handler)
  446. {
  447. Q_D(XmlReader);
  448. d->errorhandler = handler;
  449. }
  450. QXmlErrorHandler* XmlReader::errorHandler(void) const
  451. {
  452. const XmlReaderPrivate* d = this->d_func();
  453. return d->errorhandler;
  454. }
  455. void XmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  456. {
  457. Q_D(XmlReader);
  458. d->lexicalhandler = handler;
  459. }
  460. QXmlLexicalHandler* XmlReader::lexicalHandler(void) const
  461. {
  462. const XmlReaderPrivate* d = this->d_func();
  463. return d->lexicalhandler;
  464. }
  465. void XmlReader::setDeclHandler(QXmlDeclHandler* handler)
  466. {
  467. Q_D(XmlReader);
  468. d->declhandler = handler;
  469. }
  470. QXmlDeclHandler* XmlReader::declHandler(void) const
  471. {
  472. const XmlReaderPrivate* d = this->d_func();
  473. return d->declhandler;
  474. }
  475. bool XmlReader::parse(QIODevice& input)
  476. {
  477. Q_D(XmlReader);
  478. if (d->contenthandler) {
  479. d->contenthandler->setDocumentLocator(d->locator.data());
  480. }
  481. d->parse(input);
  482. return true;
  483. }
  484. int XmlReaderLocator::columnNumber(void) const
  485. {
  486. return xmlTextReaderGetParserColumnNumber(this->reader->d_func()->m_reader);
  487. }
  488. int XmlReaderLocator::lineNumber(void) const
  489. {
  490. return xmlTextReaderGetParserLineNumber(this->reader->d_func()->m_reader);
  491. }
  492. } // namespace XML2