fb2xml2.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. #include "fb2xml2.h"
  2. #include <cstring>
  3. #include <libxml/tree.h>
  4. #include <libxml/parser.h>
  5. #include <libxml/HTMLparser.h>
  6. #include <libxml/xmlreader.h>
  7. #include <QtDebug>
  8. namespace XML2 {
  9. //---------------------------------------------------------------------------
  10. // XML2::HtmlReader
  11. //---------------------------------------------------------------------------
  12. class HtmlReaderLocator : public QXmlLocator {
  13. public:
  14. HtmlReaderLocator(HtmlReader* r) : reader(r) {}
  15. virtual int columnNumber(void) const;
  16. virtual int lineNumber(void) const;
  17. private:
  18. HtmlReader* reader;
  19. };
  20. class HtmlReaderPrivate
  21. {
  22. private:
  23. class ClosedTag : public QList<QString> { public: ClosedTag(); };
  24. public:
  25. ~HtmlReaderPrivate(void) {}
  26. private:
  27. HtmlReaderPrivate(HtmlReader* reader);
  28. static void startDocument(void* c);
  29. static void endDocument(void* c);
  30. static void startElement(void* c, const xmlChar* name, const xmlChar** attrs);
  31. static void endElement(void* c, const xmlChar* name);
  32. static void comment(void* c, const xmlChar* value);
  33. static void cdataBlock(void* c, const xmlChar* value, int len);
  34. static void processingInstruction(void* c, const xmlChar* target, const xmlChar* data);
  35. static void characters(void* c, const xmlChar* ch, int len);
  36. static void ignorableWhitespace(void* c, const xmlChar* ch, int len);
  37. static void internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId);
  38. static QString C2S(const xmlChar* text, int size = -1);
  39. static QString local(const QString &name);
  40. void parse(const QXmlInputSource* input);
  41. QScopedPointer<HtmlReaderLocator> locator;
  42. Q_DECLARE_PUBLIC(HtmlReader)
  43. HtmlReader* q_ptr;
  44. QXmlEntityResolver* entityresolver;
  45. QXmlDTDHandler* dtdhandler;
  46. QXmlContentHandler* contenthandler;
  47. QXmlErrorHandler* errorhandler;
  48. QXmlLexicalHandler* lexicalhandler;
  49. QXmlDeclHandler* declhandler;
  50. xmlParserCtxt* context;
  51. QList<QString> closed;
  52. friend class HtmlReaderLocator;
  53. };
  54. HtmlReaderPrivate::HtmlReaderPrivate(HtmlReader* reader)
  55. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), context(0)
  56. {
  57. this->locator.reset(new HtmlReaderLocator(reader));
  58. }
  59. HtmlReaderPrivate::ClosedTag::ClosedTag()
  60. {
  61. *this << "area";
  62. *this << "base";
  63. *this << "br";
  64. *this << "col";
  65. *this << "command";
  66. *this << "embed";
  67. *this << "hr";
  68. *this << "img";
  69. *this << "input";
  70. *this << "keygen";
  71. *this << "link";
  72. *this << "meta";
  73. *this << "param";
  74. *this << "source";
  75. *this << "track";
  76. *this << "wbr";
  77. }
  78. QString HtmlReaderPrivate::C2S(const xmlChar* text, int size)
  79. {
  80. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  81. }
  82. void HtmlReaderPrivate::parse(const QXmlInputSource* input)
  83. {
  84. htmlSAXHandler handler;
  85. QByteArray arr = input->data().toUtf8();
  86. std::memset(&handler, 0, sizeof(handler));
  87. handler.startDocument = &HtmlReaderPrivate::startDocument;
  88. handler.endDocument = &HtmlReaderPrivate::endDocument;
  89. handler.startElement = &HtmlReaderPrivate::startElement;
  90. handler.endElement = &HtmlReaderPrivate::endElement;
  91. handler.comment = &HtmlReaderPrivate::comment;
  92. handler.cdataBlock = &HtmlReaderPrivate::cdataBlock;
  93. handler.processingInstruction = &HtmlReaderPrivate::processingInstruction;
  94. handler.characters = &HtmlReaderPrivate::characters;
  95. handler.ignorableWhitespace = &HtmlReaderPrivate::ignorableWhitespace;
  96. handler.internalSubset = &HtmlReaderPrivate::internalSubset;
  97. this->context = htmlCreatePushParserCtxt(&handler, this, arr.constData(), arr.size(), "", XML_CHAR_ENCODING_UTF8);
  98. htmlParseChunk(this->context, NULL, 0, 1);
  99. htmlFreeParserCtxt(this->context);
  100. xmlCleanupParser();
  101. }
  102. void HtmlReaderPrivate::startDocument(void* c)
  103. {
  104. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  105. if (r->contenthandler) {
  106. r->contenthandler->startDocument();
  107. }
  108. }
  109. void HtmlReaderPrivate::endDocument(void* c)
  110. {
  111. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  112. if (r->contenthandler) {
  113. r->contenthandler->endDocument();
  114. }
  115. }
  116. QString HtmlReaderPrivate::local(const QString &name)
  117. {
  118. return name.mid(name.lastIndexOf(":"));
  119. }
  120. void HtmlReaderPrivate::startElement(void* c, const xmlChar* name, const xmlChar** attrs)
  121. {
  122. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  123. if (r->contenthandler) {
  124. QXmlAttributes a;
  125. if (attrs) {
  126. int i = 0;
  127. while (attrs[i]) {
  128. QString qName = C2S(attrs[i]);
  129. a.append(qName, "", local(qName), C2S(attrs[i+1]));
  130. i += 2;
  131. }
  132. }
  133. static ClosedTag closed;
  134. QString qName = C2S(name);
  135. QString localName = local(qName);
  136. r->contenthandler->startElement("", localName, qName, a);
  137. if (closed.indexOf(qName.toLower()) != -1) {
  138. r->contenthandler->endElement("", localName, qName);
  139. }
  140. }
  141. }
  142. void HtmlReaderPrivate::endElement(void* c, const xmlChar* name)
  143. {
  144. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  145. if (r->contenthandler) {
  146. QString qName = C2S(name);
  147. r->contenthandler->endElement("", local(qName), qName);
  148. }
  149. }
  150. void HtmlReaderPrivate::comment(void* c, const xmlChar* value)
  151. {
  152. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  153. if (r->lexicalhandler) {
  154. r->lexicalhandler->comment(C2S(value));
  155. }
  156. }
  157. void HtmlReaderPrivate::cdataBlock(void* c, const xmlChar* value, int len)
  158. {
  159. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  160. if (r->lexicalhandler) {
  161. r->lexicalhandler->startCDATA();
  162. if (r->contenthandler) {
  163. r->contenthandler->characters(C2S(value, len));
  164. }
  165. r->lexicalhandler->endCDATA();
  166. }
  167. }
  168. void HtmlReaderPrivate::processingInstruction(void* c, const xmlChar* target, const xmlChar* data)
  169. {
  170. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  171. if (r->contenthandler) {
  172. r->contenthandler->processingInstruction(C2S(target), C2S(data));
  173. }
  174. }
  175. void HtmlReaderPrivate::characters(void* c, const xmlChar* ch, int len)
  176. {
  177. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  178. if (r->contenthandler) {
  179. r->contenthandler->characters(C2S(ch, len));
  180. }
  181. }
  182. void HtmlReaderPrivate::ignorableWhitespace(void* c, const xmlChar* ch, int len)
  183. {
  184. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  185. if (r->contenthandler) {
  186. r->contenthandler->ignorableWhitespace(C2S(ch, len));
  187. }
  188. }
  189. void HtmlReaderPrivate::internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId)
  190. {
  191. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  192. if (r->lexicalhandler) {
  193. r->lexicalhandler->startDTD(C2S(name), C2S(publicId), C2S(systemId));
  194. r->lexicalhandler->endDTD();
  195. }
  196. }
  197. HtmlReader::HtmlReader(void)
  198. : d_ptr(new HtmlReaderPrivate(this))
  199. {
  200. }
  201. HtmlReader::~HtmlReader(void)
  202. {
  203. }
  204. bool HtmlReader::feature(const QString&, bool* ok) const
  205. {
  206. if (ok) {
  207. *ok = false;
  208. }
  209. return false;
  210. }
  211. void HtmlReader::setFeature(const QString&, bool)
  212. {
  213. }
  214. bool HtmlReader::hasFeature(const QString&) const
  215. {
  216. return false;
  217. }
  218. void* HtmlReader::property(const QString&, bool* ok) const
  219. {
  220. if (ok) {
  221. *ok = false;
  222. }
  223. return 0;
  224. }
  225. void HtmlReader::setProperty(const QString&, void*)
  226. {
  227. }
  228. bool HtmlReader::hasProperty(const QString&) const
  229. {
  230. return false;
  231. }
  232. void HtmlReader::setEntityResolver(QXmlEntityResolver* handler)
  233. {
  234. Q_D(HtmlReader);
  235. d->entityresolver = handler;
  236. }
  237. QXmlEntityResolver* HtmlReader::entityResolver(void) const
  238. {
  239. const HtmlReaderPrivate* d = this->d_func();
  240. return d->entityresolver;
  241. }
  242. void HtmlReader::setDTDHandler(QXmlDTDHandler* handler)
  243. {
  244. Q_D(HtmlReader);
  245. d->dtdhandler = handler;
  246. }
  247. QXmlDTDHandler* HtmlReader::DTDHandler(void) const
  248. {
  249. const HtmlReaderPrivate* d = this->d_func();
  250. return d->dtdhandler;
  251. }
  252. void HtmlReader::setContentHandler(QXmlContentHandler* handler)
  253. {
  254. Q_D(HtmlReader);
  255. d->contenthandler = handler;
  256. }
  257. QXmlContentHandler* HtmlReader::contentHandler(void) const
  258. {
  259. const HtmlReaderPrivate* d = this->d_func();
  260. return d->contenthandler;
  261. }
  262. void HtmlReader::setErrorHandler(QXmlErrorHandler* handler)
  263. {
  264. Q_D(HtmlReader);
  265. d->errorhandler = handler;
  266. }
  267. QXmlErrorHandler* HtmlReader::errorHandler(void) const
  268. {
  269. const HtmlReaderPrivate* d = this->d_func();
  270. return d->errorhandler;
  271. }
  272. void HtmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  273. {
  274. Q_D(HtmlReader);
  275. d->lexicalhandler = handler;
  276. }
  277. QXmlLexicalHandler* HtmlReader::lexicalHandler(void) const
  278. {
  279. const HtmlReaderPrivate* d = this->d_func();
  280. return d->lexicalhandler;
  281. }
  282. void HtmlReader::setDeclHandler(QXmlDeclHandler* handler)
  283. {
  284. Q_D(HtmlReader);
  285. d->declhandler = handler;
  286. }
  287. QXmlDeclHandler* HtmlReader::declHandler(void) const
  288. {
  289. const HtmlReaderPrivate* d = this->d_func();
  290. return d->declhandler;
  291. }
  292. bool HtmlReader::parse(const QXmlInputSource& input)
  293. {
  294. return this->parse(&input);
  295. }
  296. bool HtmlReader::parse(const QXmlInputSource* input)
  297. {
  298. Q_D(HtmlReader);
  299. if (d->contenthandler) {
  300. d->contenthandler->setDocumentLocator(d->locator.data());
  301. }
  302. d->parse(input);
  303. return true;
  304. }
  305. int HtmlReaderLocator::columnNumber(void) const
  306. {
  307. return this->reader->d_func()->context->input->col;
  308. }
  309. int HtmlReaderLocator::lineNumber(void) const
  310. {
  311. return this->reader->d_func()->context->input->line;
  312. }
  313. //---------------------------------------------------------------------------
  314. // XML2::HtmlReader
  315. //---------------------------------------------------------------------------
  316. class XmlReaderLocator : public QXmlLocator {
  317. public:
  318. XmlReaderLocator(XmlReader* r) : reader(r) {}
  319. virtual int columnNumber(void) const;
  320. virtual int lineNumber(void) const;
  321. private:
  322. XmlReader* reader;
  323. };
  324. class XmlReaderPrivate {
  325. public:
  326. ~XmlReaderPrivate(void) {}
  327. private:
  328. XmlReaderPrivate(XmlReader* reader);
  329. static void onError(void *arg, const char *msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator);
  330. static int onRead(void * context, char * buffer, int len);
  331. static QString C2S(const xmlChar* text, int size = -1);
  332. bool parse(const QXmlInputSource* input);
  333. bool parse(QIODevice& input);
  334. void process(xmlTextReaderPtr reader);
  335. QScopedPointer<XmlReaderLocator> locator;
  336. Q_DECLARE_PUBLIC(XmlReader)
  337. XmlReader* q_ptr;
  338. QXmlEntityResolver* entityresolver;
  339. QXmlDTDHandler* dtdhandler;
  340. QXmlContentHandler* contenthandler;
  341. QXmlErrorHandler* errorhandler;
  342. QXmlLexicalHandler* lexicalhandler;
  343. QXmlDeclHandler* declhandler;
  344. xmlTextReaderPtr m_reader;
  345. friend class XmlReaderLocator;
  346. };
  347. XmlReaderPrivate::XmlReaderPrivate(XmlReader* reader)
  348. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), m_reader(0)
  349. {
  350. this->locator.reset(new XmlReaderLocator(reader));
  351. }
  352. QString XmlReaderPrivate::C2S(const xmlChar* text, int size)
  353. {
  354. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  355. }
  356. void XmlReaderPrivate::onError(void * arg, const char * msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator)
  357. {
  358. XmlReaderPrivate* r = reinterpret_cast<XmlReaderPrivate*>(arg);
  359. if (r->errorhandler) {
  360. QXmlParseException e(QString::fromLocal8Bit(msg), xmlTextReaderGetParserColumnNumber(r->m_reader), xmlTextReaderGetParserLineNumber(r->m_reader));
  361. switch (severity) {
  362. case XML_PARSER_SEVERITY_VALIDITY_WARNING: r->errorhandler->warning(e); break;
  363. case XML_PARSER_SEVERITY_VALIDITY_ERROR: r->errorhandler->error(e); break;
  364. case XML_PARSER_SEVERITY_WARNING: r->errorhandler->warning(e); break;
  365. case XML_PARSER_SEVERITY_ERROR: r->errorhandler->error(e); break;
  366. }
  367. }
  368. }
  369. void XmlReaderPrivate::process(xmlTextReaderPtr reader)
  370. {
  371. if (!contenthandler) return;
  372. switch (xmlTextReaderNodeType(reader)) {
  373. case XML_READER_TYPE_ELEMENT: {
  374. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  375. QString qName = C2S(xmlTextReaderConstName(reader));
  376. bool empty = xmlTextReaderIsEmptyElement(reader);
  377. QXmlAttributes atts;
  378. while (xmlTextReaderMoveToNextAttribute(reader)) {
  379. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  380. QString qName = C2S(xmlTextReaderConstName(reader));
  381. QString value = C2S(xmlTextReaderConstValue(reader));
  382. atts.append(qName, "", localName, value);
  383. }
  384. contenthandler->startElement("", localName, qName, atts);
  385. if (empty) contenthandler->endElement("", localName, qName);
  386. } break;
  387. case XML_READER_TYPE_TEXT: {
  388. QString value = C2S(xmlTextReaderConstValue(reader));
  389. contenthandler->characters(value);
  390. } break;
  391. case XML_READER_TYPE_END_ELEMENT: {
  392. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  393. QString qName = C2S(xmlTextReaderConstName(reader));
  394. contenthandler->endElement("", localName, qName);
  395. } break;
  396. }
  397. }
  398. int XmlReaderPrivate::onRead(void * context, char * buffer, int len)
  399. {
  400. QIODevice *device = reinterpret_cast<QIODevice*>(context);
  401. return device->read(buffer, len);
  402. }
  403. bool XmlReaderPrivate::parse(const QXmlInputSource* input)
  404. {
  405. QByteArray arr = input->data().toUtf8();
  406. int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET;
  407. m_reader = xmlReaderForMemory(arr.constData(), arr.size(), NULL, NULL, options);
  408. if (!m_reader) return false;
  409. xmlTextReaderSetErrorHandler(m_reader, &XmlReaderPrivate::onError, this);
  410. while (xmlTextReaderRead(m_reader) == 1) process(m_reader);
  411. xmlFreeTextReader(m_reader);
  412. return true;
  413. }
  414. bool XmlReaderPrivate::parse(QIODevice& input)
  415. {
  416. int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET;
  417. m_reader = xmlReaderForIO(&XmlReaderPrivate::onRead, NULL, &input, NULL, NULL, options);
  418. if (!m_reader) return false;
  419. xmlTextReaderSetErrorHandler(m_reader, &XmlReaderPrivate::onError, this);
  420. while (xmlTextReaderRead(m_reader) == 1) process(m_reader);
  421. xmlFreeTextReader(m_reader);
  422. return true;
  423. }
  424. XmlReader::XmlReader(void)
  425. : d_ptr(new XmlReaderPrivate(this))
  426. {
  427. }
  428. XmlReader::~XmlReader(void)
  429. {
  430. }
  431. bool XmlReader::feature(const QString&, bool* ok) const
  432. {
  433. if (ok) *ok = false;
  434. return false;
  435. }
  436. void XmlReader::setFeature(const QString&, bool)
  437. {
  438. }
  439. bool XmlReader::hasFeature(const QString&) const
  440. {
  441. return false;
  442. }
  443. void* XmlReader::property(const QString&, bool* ok) const
  444. {
  445. if (ok) *ok = false;
  446. return 0;
  447. }
  448. void XmlReader::setProperty(const QString&, void*)
  449. {
  450. }
  451. bool XmlReader::hasProperty(const QString&) const
  452. {
  453. return false;
  454. }
  455. void XmlReader::setEntityResolver(QXmlEntityResolver* handler)
  456. {
  457. Q_D(XmlReader);
  458. d->entityresolver = handler;
  459. }
  460. QXmlEntityResolver* XmlReader::entityResolver(void) const
  461. {
  462. const XmlReaderPrivate* d = this->d_func();
  463. return d->entityresolver;
  464. }
  465. void XmlReader::setDTDHandler(QXmlDTDHandler* handler)
  466. {
  467. Q_D(XmlReader);
  468. d->dtdhandler = handler;
  469. }
  470. QXmlDTDHandler* XmlReader::DTDHandler(void) const
  471. {
  472. const XmlReaderPrivate* d = this->d_func();
  473. return d->dtdhandler;
  474. }
  475. void XmlReader::setContentHandler(QXmlContentHandler* handler)
  476. {
  477. Q_D(XmlReader);
  478. d->contenthandler = handler;
  479. }
  480. QXmlContentHandler* XmlReader::contentHandler(void) const
  481. {
  482. const XmlReaderPrivate* d = this->d_func();
  483. return d->contenthandler;
  484. }
  485. void XmlReader::setErrorHandler(QXmlErrorHandler* handler)
  486. {
  487. Q_D(XmlReader);
  488. d->errorhandler = handler;
  489. }
  490. QXmlErrorHandler* XmlReader::errorHandler(void) const
  491. {
  492. const XmlReaderPrivate* d = this->d_func();
  493. return d->errorhandler;
  494. }
  495. void XmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  496. {
  497. Q_D(XmlReader);
  498. d->lexicalhandler = handler;
  499. }
  500. QXmlLexicalHandler* XmlReader::lexicalHandler(void) const
  501. {
  502. const XmlReaderPrivate* d = this->d_func();
  503. return d->lexicalhandler;
  504. }
  505. void XmlReader::setDeclHandler(QXmlDeclHandler* handler)
  506. {
  507. Q_D(XmlReader);
  508. d->declhandler = handler;
  509. }
  510. QXmlDeclHandler* XmlReader::declHandler(void) const
  511. {
  512. const XmlReaderPrivate* d = this->d_func();
  513. return d->declhandler;
  514. }
  515. bool XmlReader::parse(const QXmlInputSource& input)
  516. {
  517. return this->parse(&input);
  518. }
  519. bool XmlReader::parse(const QXmlInputSource* input)
  520. {
  521. Q_D(XmlReader);
  522. if (d->contenthandler) {
  523. d->contenthandler->setDocumentLocator(d->locator.data());
  524. }
  525. d->parse(input);
  526. return true;
  527. }
  528. bool XmlReader::parse(QIODevice& input)
  529. {
  530. Q_D(XmlReader);
  531. if (d->contenthandler) {
  532. d->contenthandler->setDocumentLocator(d->locator.data());
  533. }
  534. d->parse(input);
  535. return true;
  536. }
  537. int XmlReaderLocator::columnNumber(void) const
  538. {
  539. return xmlTextReaderGetParserColumnNumber(this->reader->d_func()->m_reader);
  540. }
  541. int XmlReaderLocator::lineNumber(void) const
  542. {
  543. return xmlTextReaderGetParserLineNumber(this->reader->d_func()->m_reader);
  544. }
  545. } // namespace XML2