fb2xml2.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. #include "fb2xml2.h"
  2. #include <cstring>
  3. #include <libxml/tree.h>
  4. #include <libxml/parser.h>
  5. #include <libxml/HTMLparser.h>
  6. #include <libxml/xmlreader.h>
  7. namespace XML2 {
  8. //---------------------------------------------------------------------------
  9. // XML2::HtmlReader
  10. //---------------------------------------------------------------------------
  11. class HtmlReaderLocator : public QXmlLocator {
  12. public:
  13. HtmlReaderLocator(HtmlReader* r) : reader(r) {}
  14. virtual int columnNumber(void) const;
  15. virtual int lineNumber(void) const;
  16. private:
  17. HtmlReader* reader;
  18. };
  19. class HtmlReaderPrivate {
  20. public:
  21. ~HtmlReaderPrivate(void) {}
  22. private:
  23. HtmlReaderPrivate(HtmlReader* reader);
  24. static void startDocument(void* c);
  25. static void endDocument(void* c);
  26. static void startElement(void* c, const xmlChar* name, const xmlChar** attrs);
  27. static void endElement(void* c, const xmlChar* name);
  28. static void comment(void* c, const xmlChar* value);
  29. static void cdataBlock(void* c, const xmlChar* value, int len);
  30. static void processingInstruction(void* c, const xmlChar* target, const xmlChar* data);
  31. static void characters(void* c, const xmlChar* ch, int len);
  32. static void ignorableWhitespace(void* c, const xmlChar* ch, int len);
  33. static void internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId);
  34. static QString C2S(const xmlChar* text, int size = -1);
  35. static QString local(const QString &name);
  36. void parse(const QXmlInputSource* input);
  37. QScopedPointer<HtmlReaderLocator> locator;
  38. Q_DECLARE_PUBLIC(HtmlReader)
  39. HtmlReader* q_ptr;
  40. QXmlEntityResolver* entityresolver;
  41. QXmlDTDHandler* dtdhandler;
  42. QXmlContentHandler* contenthandler;
  43. QXmlErrorHandler* errorhandler;
  44. QXmlLexicalHandler* lexicalhandler;
  45. QXmlDeclHandler* declhandler;
  46. xmlParserCtxt* context;
  47. friend class HtmlReaderLocator;
  48. };
  49. HtmlReaderPrivate::HtmlReaderPrivate(HtmlReader* reader)
  50. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), context(0)
  51. {
  52. this->locator.reset(new HtmlReaderLocator(reader));
  53. }
  54. QString HtmlReaderPrivate::C2S(const xmlChar* text, int size)
  55. {
  56. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  57. }
  58. void HtmlReaderPrivate::parse(const QXmlInputSource* input)
  59. {
  60. htmlSAXHandler handler;
  61. QByteArray arr = input->data().toUtf8();
  62. std::memset(&handler, 0, sizeof(handler));
  63. handler.startDocument = &HtmlReaderPrivate::startDocument;
  64. handler.endDocument = &HtmlReaderPrivate::endDocument;
  65. handler.startElement = &HtmlReaderPrivate::startElement;
  66. handler.endElement = &HtmlReaderPrivate::endElement;
  67. handler.comment = &HtmlReaderPrivate::comment;
  68. handler.cdataBlock = &HtmlReaderPrivate::cdataBlock;
  69. handler.processingInstruction = &HtmlReaderPrivate::processingInstruction;
  70. handler.characters = &HtmlReaderPrivate::characters;
  71. handler.ignorableWhitespace = &HtmlReaderPrivate::ignorableWhitespace;
  72. handler.internalSubset = &HtmlReaderPrivate::internalSubset;
  73. this->context = htmlCreatePushParserCtxt(&handler, this, arr.data(), arr.size(), "", XML_CHAR_ENCODING_UTF8);
  74. htmlParseChunk(this->context, NULL, 0, 1);
  75. htmlFreeParserCtxt(this->context);
  76. xmlCleanupParser();
  77. }
  78. void HtmlReaderPrivate::startDocument(void* c)
  79. {
  80. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  81. if (r->contenthandler) {
  82. r->contenthandler->startDocument();
  83. }
  84. }
  85. void HtmlReaderPrivate::endDocument(void* c)
  86. {
  87. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  88. if (r->contenthandler) {
  89. r->contenthandler->endDocument();
  90. }
  91. }
  92. QString HtmlReaderPrivate::local(const QString &name)
  93. {
  94. return name.mid(name.lastIndexOf(":"));
  95. }
  96. void HtmlReaderPrivate::startElement(void* c, const xmlChar* name, const xmlChar** attrs)
  97. {
  98. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  99. if (r->contenthandler) {
  100. QXmlAttributes a;
  101. if (attrs) {
  102. int i = 0;
  103. while (attrs[i]) {
  104. QString qName = C2S(attrs[i]);
  105. a.append(qName, "", local(qName), C2S(attrs[i+1]));
  106. i += 2;
  107. }
  108. }
  109. QString qName = C2S(name);
  110. r->contenthandler->startElement("", local(qName), qName, a);
  111. }
  112. }
  113. void HtmlReaderPrivate::endElement(void* c, const xmlChar* name)
  114. {
  115. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  116. if (r->contenthandler) {
  117. QString qName = C2S(name);
  118. r->contenthandler->endElement("", local(qName), qName);
  119. }
  120. }
  121. void HtmlReaderPrivate::comment(void* c, const xmlChar* value)
  122. {
  123. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  124. if (r->lexicalhandler) {
  125. r->lexicalhandler->comment(C2S(value));
  126. }
  127. }
  128. void HtmlReaderPrivate::cdataBlock(void* c, const xmlChar* value, int len)
  129. {
  130. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  131. if (r->lexicalhandler) {
  132. r->lexicalhandler->startCDATA();
  133. if (r->contenthandler) {
  134. r->contenthandler->characters(C2S(value, len));
  135. }
  136. r->lexicalhandler->endCDATA();
  137. }
  138. }
  139. void HtmlReaderPrivate::processingInstruction(void* c, const xmlChar* target, const xmlChar* data)
  140. {
  141. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  142. if (r->contenthandler) {
  143. r->contenthandler->processingInstruction(C2S(target), C2S(data));
  144. }
  145. }
  146. void HtmlReaderPrivate::characters(void* c, const xmlChar* ch, int len)
  147. {
  148. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  149. if (r->contenthandler) {
  150. r->contenthandler->characters(C2S(ch, len));
  151. }
  152. }
  153. void HtmlReaderPrivate::ignorableWhitespace(void* c, const xmlChar* ch, int len)
  154. {
  155. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  156. if (r->contenthandler) {
  157. r->contenthandler->ignorableWhitespace(C2S(ch, len));
  158. }
  159. }
  160. void HtmlReaderPrivate::internalSubset(void* c, const xmlChar* name, const xmlChar* publicId, const xmlChar* systemId)
  161. {
  162. HtmlReaderPrivate* r = reinterpret_cast<HtmlReaderPrivate*>(c);
  163. if (r->lexicalhandler) {
  164. r->lexicalhandler->startDTD(C2S(name), C2S(publicId), C2S(systemId));
  165. r->lexicalhandler->endDTD();
  166. }
  167. }
  168. HtmlReader::HtmlReader(void)
  169. : d_ptr(new HtmlReaderPrivate(this))
  170. {
  171. }
  172. HtmlReader::~HtmlReader(void)
  173. {
  174. }
  175. bool HtmlReader::feature(const QString&, bool* ok) const
  176. {
  177. if (ok) {
  178. *ok = false;
  179. }
  180. return false;
  181. }
  182. void HtmlReader::setFeature(const QString&, bool)
  183. {
  184. }
  185. bool HtmlReader::hasFeature(const QString&) const
  186. {
  187. return false;
  188. }
  189. void* HtmlReader::property(const QString&, bool* ok) const
  190. {
  191. if (ok) {
  192. *ok = false;
  193. }
  194. return 0;
  195. }
  196. void HtmlReader::setProperty(const QString&, void*)
  197. {
  198. }
  199. bool HtmlReader::hasProperty(const QString&) const
  200. {
  201. return false;
  202. }
  203. void HtmlReader::setEntityResolver(QXmlEntityResolver* handler)
  204. {
  205. Q_D(HtmlReader);
  206. d->entityresolver = handler;
  207. }
  208. QXmlEntityResolver* HtmlReader::entityResolver(void) const
  209. {
  210. const HtmlReaderPrivate* d = this->d_func();
  211. return d->entityresolver;
  212. }
  213. void HtmlReader::setDTDHandler(QXmlDTDHandler* handler)
  214. {
  215. Q_D(HtmlReader);
  216. d->dtdhandler = handler;
  217. }
  218. QXmlDTDHandler* HtmlReader::DTDHandler(void) const
  219. {
  220. const HtmlReaderPrivate* d = this->d_func();
  221. return d->dtdhandler;
  222. }
  223. void HtmlReader::setContentHandler(QXmlContentHandler* handler)
  224. {
  225. Q_D(HtmlReader);
  226. d->contenthandler = handler;
  227. }
  228. QXmlContentHandler* HtmlReader::contentHandler(void) const
  229. {
  230. const HtmlReaderPrivate* d = this->d_func();
  231. return d->contenthandler;
  232. }
  233. void HtmlReader::setErrorHandler(QXmlErrorHandler* handler)
  234. {
  235. Q_D(HtmlReader);
  236. d->errorhandler = handler;
  237. }
  238. QXmlErrorHandler* HtmlReader::errorHandler(void) const
  239. {
  240. const HtmlReaderPrivate* d = this->d_func();
  241. return d->errorhandler;
  242. }
  243. void HtmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  244. {
  245. Q_D(HtmlReader);
  246. d->lexicalhandler = handler;
  247. }
  248. QXmlLexicalHandler* HtmlReader::lexicalHandler(void) const
  249. {
  250. const HtmlReaderPrivate* d = this->d_func();
  251. return d->lexicalhandler;
  252. }
  253. void HtmlReader::setDeclHandler(QXmlDeclHandler* handler)
  254. {
  255. Q_D(HtmlReader);
  256. d->declhandler = handler;
  257. }
  258. QXmlDeclHandler* HtmlReader::declHandler(void) const
  259. {
  260. const HtmlReaderPrivate* d = this->d_func();
  261. return d->declhandler;
  262. }
  263. bool HtmlReader::parse(const QXmlInputSource& input)
  264. {
  265. return this->parse(&input);
  266. }
  267. bool HtmlReader::parse(const QXmlInputSource* input)
  268. {
  269. Q_D(HtmlReader);
  270. if (d->contenthandler) {
  271. d->contenthandler->setDocumentLocator(d->locator.data());
  272. }
  273. d->parse(input);
  274. return true;
  275. }
  276. int HtmlReaderLocator::columnNumber(void) const
  277. {
  278. return this->reader->d_func()->context->input->col;
  279. }
  280. int HtmlReaderLocator::lineNumber(void) const
  281. {
  282. return this->reader->d_func()->context->input->line;
  283. }
  284. //---------------------------------------------------------------------------
  285. // XML2::HtmlReader
  286. //---------------------------------------------------------------------------
  287. class XmlReaderLocator : public QXmlLocator {
  288. public:
  289. XmlReaderLocator(XmlReader* r) : reader(r) {}
  290. virtual int columnNumber(void) const;
  291. virtual int lineNumber(void) const;
  292. private:
  293. XmlReader* reader;
  294. };
  295. class XmlReaderPrivate {
  296. public:
  297. ~XmlReaderPrivate(void) {}
  298. private:
  299. XmlReaderPrivate(XmlReader* reader);
  300. static void onError(void *arg, const char *msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator);
  301. static int onRead(void * context, char * buffer, int len);
  302. static QString C2S(const xmlChar* text, int size = -1);
  303. bool parse(const QXmlInputSource* input);
  304. bool parse(QIODevice& input);
  305. void process(xmlTextReaderPtr reader);
  306. QScopedPointer<XmlReaderLocator> locator;
  307. Q_DECLARE_PUBLIC(XmlReader)
  308. XmlReader* q_ptr;
  309. QXmlEntityResolver* entityresolver;
  310. QXmlDTDHandler* dtdhandler;
  311. QXmlContentHandler* contenthandler;
  312. QXmlErrorHandler* errorhandler;
  313. QXmlLexicalHandler* lexicalhandler;
  314. QXmlDeclHandler* declhandler;
  315. xmlTextReaderPtr m_reader;
  316. friend class XmlReaderLocator;
  317. };
  318. XmlReaderPrivate::XmlReaderPrivate(XmlReader* reader)
  319. : q_ptr(reader), entityresolver(0), dtdhandler(0), contenthandler(0), errorhandler(0), lexicalhandler(0), declhandler(0), m_reader(0)
  320. {
  321. this->locator.reset(new XmlReaderLocator(reader));
  322. }
  323. QString XmlReaderPrivate::C2S(const xmlChar* text, int size)
  324. {
  325. return QString::fromLocal8Bit(reinterpret_cast<const char*>(text), size);
  326. }
  327. void XmlReaderPrivate::onError(void * arg, const char * msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator)
  328. {
  329. XmlReaderPrivate* r = reinterpret_cast<XmlReaderPrivate*>(arg);
  330. if (r->errorhandler) {
  331. QXmlParseException e(QString::fromLocal8Bit(msg), xmlTextReaderGetParserColumnNumber(r->m_reader), xmlTextReaderGetParserLineNumber(r->m_reader));
  332. switch (severity) {
  333. case XML_PARSER_SEVERITY_VALIDITY_WARNING: r->errorhandler->warning(e); break;
  334. case XML_PARSER_SEVERITY_VALIDITY_ERROR: r->errorhandler->error(e); break;
  335. case XML_PARSER_SEVERITY_WARNING: r->errorhandler->warning(e); break;
  336. case XML_PARSER_SEVERITY_ERROR: r->errorhandler->error(e); break;
  337. }
  338. }
  339. }
  340. void XmlReaderPrivate::process(xmlTextReaderPtr reader)
  341. {
  342. if (!contenthandler) return;
  343. switch (xmlTextReaderNodeType(reader)) {
  344. case XML_READER_TYPE_ELEMENT: {
  345. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  346. QString qName = C2S(xmlTextReaderConstName(reader));
  347. bool empty = xmlTextReaderIsEmptyElement(reader);
  348. QXmlAttributes atts;
  349. while (xmlTextReaderMoveToNextAttribute(reader)) {
  350. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  351. QString qName = C2S(xmlTextReaderConstName(reader));
  352. QString value = C2S(xmlTextReaderConstValue(reader));
  353. atts.append(qName, "", localName, value);
  354. }
  355. contenthandler->startElement("", localName, qName, atts);
  356. if (empty) contenthandler->endElement("", localName, qName);
  357. } break;
  358. case XML_READER_TYPE_TEXT: {
  359. QString value = C2S(xmlTextReaderConstValue(reader));
  360. contenthandler->characters(value);
  361. } break;
  362. case XML_READER_TYPE_END_ELEMENT: {
  363. QString localName = C2S(xmlTextReaderConstLocalName(reader));
  364. QString qName = C2S(xmlTextReaderConstName(reader));
  365. contenthandler->endElement("", localName, qName);
  366. } break;
  367. }
  368. }
  369. int XmlReaderPrivate::onRead(void * context, char * buffer, int len)
  370. {
  371. QIODevice *device = reinterpret_cast<QIODevice*>(context);
  372. return device->read(buffer, len);
  373. }
  374. bool XmlReaderPrivate::parse(const QXmlInputSource* input)
  375. {
  376. QByteArray arr = input->data().toUtf8();
  377. int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET;
  378. m_reader = xmlReaderForMemory(arr.data(), arr.size(), NULL, NULL, options);
  379. if (!m_reader) return false;
  380. xmlTextReaderSetErrorHandler(m_reader, &XmlReaderPrivate::onError, this);
  381. while (xmlTextReaderRead(m_reader) == 1) process(m_reader);
  382. xmlFreeTextReader(m_reader);
  383. return true;
  384. }
  385. bool XmlReaderPrivate::parse(QIODevice& input)
  386. {
  387. int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET;
  388. m_reader = xmlReaderForIO(&XmlReaderPrivate::onRead, NULL, &input, NULL, NULL, options);
  389. if (!m_reader) return false;
  390. xmlTextReaderSetErrorHandler(m_reader, &XmlReaderPrivate::onError, this);
  391. while (xmlTextReaderRead(m_reader) == 1) process(m_reader);
  392. xmlFreeTextReader(m_reader);
  393. return true;
  394. }
  395. XmlReader::XmlReader(void)
  396. : d_ptr(new XmlReaderPrivate(this))
  397. {
  398. }
  399. XmlReader::~XmlReader(void)
  400. {
  401. }
  402. bool XmlReader::feature(const QString&, bool* ok) const
  403. {
  404. if (ok) *ok = false;
  405. return false;
  406. }
  407. void XmlReader::setFeature(const QString&, bool)
  408. {
  409. }
  410. bool XmlReader::hasFeature(const QString&) const
  411. {
  412. return false;
  413. }
  414. void* XmlReader::property(const QString&, bool* ok) const
  415. {
  416. if (ok) *ok = false;
  417. return 0;
  418. }
  419. void XmlReader::setProperty(const QString&, void*)
  420. {
  421. }
  422. bool XmlReader::hasProperty(const QString&) const
  423. {
  424. return false;
  425. }
  426. void XmlReader::setEntityResolver(QXmlEntityResolver* handler)
  427. {
  428. Q_D(XmlReader);
  429. d->entityresolver = handler;
  430. }
  431. QXmlEntityResolver* XmlReader::entityResolver(void) const
  432. {
  433. const XmlReaderPrivate* d = this->d_func();
  434. return d->entityresolver;
  435. }
  436. void XmlReader::setDTDHandler(QXmlDTDHandler* handler)
  437. {
  438. Q_D(XmlReader);
  439. d->dtdhandler = handler;
  440. }
  441. QXmlDTDHandler* XmlReader::DTDHandler(void) const
  442. {
  443. const XmlReaderPrivate* d = this->d_func();
  444. return d->dtdhandler;
  445. }
  446. void XmlReader::setContentHandler(QXmlContentHandler* handler)
  447. {
  448. Q_D(XmlReader);
  449. d->contenthandler = handler;
  450. }
  451. QXmlContentHandler* XmlReader::contentHandler(void) const
  452. {
  453. const XmlReaderPrivate* d = this->d_func();
  454. return d->contenthandler;
  455. }
  456. void XmlReader::setErrorHandler(QXmlErrorHandler* handler)
  457. {
  458. Q_D(XmlReader);
  459. d->errorhandler = handler;
  460. }
  461. QXmlErrorHandler* XmlReader::errorHandler(void) const
  462. {
  463. const XmlReaderPrivate* d = this->d_func();
  464. return d->errorhandler;
  465. }
  466. void XmlReader::setLexicalHandler(QXmlLexicalHandler* handler)
  467. {
  468. Q_D(XmlReader);
  469. d->lexicalhandler = handler;
  470. }
  471. QXmlLexicalHandler* XmlReader::lexicalHandler(void) const
  472. {
  473. const XmlReaderPrivate* d = this->d_func();
  474. return d->lexicalhandler;
  475. }
  476. void XmlReader::setDeclHandler(QXmlDeclHandler* handler)
  477. {
  478. Q_D(XmlReader);
  479. d->declhandler = handler;
  480. }
  481. QXmlDeclHandler* XmlReader::declHandler(void) const
  482. {
  483. const XmlReaderPrivate* d = this->d_func();
  484. return d->declhandler;
  485. }
  486. bool XmlReader::parse(const QXmlInputSource& input)
  487. {
  488. return this->parse(&input);
  489. }
  490. bool XmlReader::parse(const QXmlInputSource* input)
  491. {
  492. Q_D(XmlReader);
  493. if (d->contenthandler) {
  494. d->contenthandler->setDocumentLocator(d->locator.data());
  495. }
  496. d->parse(input);
  497. return true;
  498. }
  499. bool XmlReader::parse(QIODevice& input)
  500. {
  501. Q_D(XmlReader);
  502. if (d->contenthandler) {
  503. d->contenthandler->setDocumentLocator(d->locator.data());
  504. }
  505. d->parse(input);
  506. return true;
  507. }
  508. int XmlReaderLocator::columnNumber(void) const
  509. {
  510. return xmlTextReaderGetParserColumnNumber(this->reader->d_func()->m_reader);
  511. }
  512. int XmlReaderLocator::lineNumber(void) const
  513. {
  514. return xmlTextReaderGetParserLineNumber(this->reader->d_func()->m_reader);
  515. }
  516. } // namespace XML2