fb2read.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. #include <QtGui>
  2. #include <QtDebug>
  3. #include "fb2read.h"
  4. //---------------------------------------------------------------------------
  5. // Fb2ReadThread
  6. //---------------------------------------------------------------------------
  7. Fb2ReadThread::Fb2ReadThread(QObject *parent, const QString &filename)
  8. : QThread(parent)
  9. , m_filename(filename)
  10. , m_abort(false)
  11. {
  12. }
  13. Fb2ReadThread::~Fb2ReadThread()
  14. {
  15. stop();
  16. wait();
  17. }
  18. void Fb2ReadThread::stop()
  19. {
  20. QMutexLocker locker(&mutex);
  21. Q_UNUSED(locker);
  22. m_abort = true;
  23. }
  24. void Fb2ReadThread::run()
  25. {
  26. if (parse()) emit html(m_filename, m_html);
  27. }
  28. void Fb2ReadThread::onFile(const QString &name, const QString &path)
  29. {
  30. emit file(name, path);
  31. }
  32. bool Fb2ReadThread::parse()
  33. {
  34. QFile file(m_filename);
  35. if (!file.open(QFile::ReadOnly | QFile::Text)) {
  36. qCritical() << QObject::tr("Cannot read file %1: %2.").arg(m_filename).arg(file.errorString());
  37. return false;
  38. }
  39. Fb2Handler handler(*this);
  40. QXmlSimpleReader reader;
  41. reader.setContentHandler(&handler);
  42. reader.setErrorHandler(&handler);
  43. QXmlInputSource source(&file);
  44. return reader.parse(source);
  45. }
  46. //---------------------------------------------------------------------------
  47. // Fb2HtmlWriter
  48. //---------------------------------------------------------------------------
  49. Fb2HtmlWriter::Fb2HtmlWriter(Fb2ReadThread &thread)
  50. : QXmlStreamWriter(thread.data())
  51. , m_thread(thread)
  52. , m_id(0)
  53. {
  54. }
  55. QString Fb2HtmlWriter::addFile(const QString &name, const QByteArray &data)
  56. {
  57. QString path = getFile(name);
  58. QFile file(path);
  59. if (file.open(QIODevice::WriteOnly)) {
  60. file.write(data);
  61. m_thread.onFile(name, path);
  62. }
  63. return path;
  64. }
  65. QString Fb2HtmlWriter::getFile(const QString &name)
  66. {
  67. StringHash::const_iterator i = m_hash.find(name);
  68. if (i == m_hash.end()) {
  69. QTemporaryFile file;
  70. file.setAutoRemove(false);
  71. file.open();
  72. return m_hash.insert(name, file.fileName()).value();
  73. } else {
  74. return i.value();
  75. }
  76. }
  77. QString Fb2HtmlWriter::newId()
  78. {
  79. return QString("FB2E%1").arg(++m_id);
  80. }
  81. //---------------------------------------------------------------------------
  82. // Fb2Handler::BaseHandler
  83. //---------------------------------------------------------------------------
  84. #define FB2_BEGIN_KEYHASH(x) \
  85. Fb2Handler::x::Keyword Fb2Handler::x::toKeyword(const QString &name) \
  86. { \
  87. static const KeywordHash map; \
  88. KeywordHash::const_iterator i = map.find(name); \
  89. return i == map.end() ? None : i.value(); \
  90. } \
  91. Fb2Handler::x::KeywordHash::KeywordHash() {
  92. #define FB2_END_KEYHASH }
  93. #define FB2_KEY(key,str) insert(str,key);
  94. static QString Value(const QXmlAttributes &attributes, const QString &name)
  95. {
  96. int count = attributes.count();
  97. for (int i = 0; i < count; i++ ) {
  98. if (attributes.localName(i).compare(name, Qt::CaseInsensitive) == 0) {
  99. return attributes.value(i);
  100. }
  101. }
  102. return QString();
  103. }
  104. Fb2Handler::BaseHandler::~BaseHandler()
  105. {
  106. if (m_handler) delete m_handler;
  107. }
  108. bool Fb2Handler::BaseHandler::doStart(const QString &name, const QXmlAttributes &attributes)
  109. {
  110. if (m_handler) return m_handler->doStart(name, attributes);
  111. m_handler = NewTag(name, attributes); if (m_handler) return true;
  112. // qCritical() << QObject::tr("Unknown XML child tag: <%1> <%2>").arg(m_name).arg(name);
  113. m_handler = new BaseHandler(m_writer, name);
  114. return true;
  115. }
  116. bool Fb2Handler::BaseHandler::doText(const QString &text)
  117. {
  118. if (m_handler) m_handler->doText(text); else TxtTag(text);
  119. return true;
  120. }
  121. bool Fb2Handler::BaseHandler::doEnd(const QString &name, bool & exists)
  122. {
  123. if (m_handler) {
  124. bool found = exists || name == m_name;
  125. m_handler->doEnd(name, found);
  126. if (m_handler->m_closed) { delete m_handler; m_handler = NULL; }
  127. if (found) { exists = true; return true; }
  128. }
  129. bool found = name == m_name;
  130. if (!found) qCritical() << QObject::tr("Conglict XML tags: <%1> - </%2>").arg(m_name).arg(name);
  131. m_closed = found || exists;
  132. if (m_closed) EndTag(m_name);
  133. exists = found;
  134. return true;
  135. }
  136. //---------------------------------------------------------------------------
  137. // Fb2Handler::RootHandler
  138. //---------------------------------------------------------------------------
  139. FB2_BEGIN_KEYHASH(RootHandler)
  140. insert("stylesheet", Style);
  141. insert("description", Descr);
  142. insert("body", Body);
  143. insert("binary", Binary);
  144. FB2_END_KEYHASH
  145. Fb2Handler::RootHandler::RootHandler(Fb2HtmlWriter &writer, const QString &name)
  146. : BaseHandler(writer, name)
  147. {
  148. m_writer.writeStartElement("html");
  149. m_writer.writeStartElement("body");
  150. }
  151. Fb2Handler::BaseHandler * Fb2Handler::RootHandler::NewTag(const QString &name, const QXmlAttributes &attributes)
  152. {
  153. switch (toKeyword(name)) {
  154. case Body : return new BodyHandler(m_writer, name, attributes, "div", name);
  155. case Descr : return new DescrHandler(m_writer, name);
  156. case Binary : return new BinaryHandler(m_writer, name, attributes);
  157. default: return NULL;
  158. }
  159. }
  160. void Fb2Handler::RootHandler::EndTag(const QString &name)
  161. {
  162. Q_UNUSED(name);
  163. m_writer.writeEndElement();
  164. m_writer.writeEndElement();
  165. }
  166. //---------------------------------------------------------------------------
  167. // Fb2Handler::HeadHandler
  168. //---------------------------------------------------------------------------
  169. Fb2Handler::HeadHandler::HeadHandler(Fb2HtmlWriter &writer, const QString &name, bool hide)
  170. : BaseHandler(writer, name)
  171. {
  172. m_writer.writeStartElement("div");
  173. m_writer.writeAttribute("class", name);
  174. if (hide) m_writer.writeAttribute("style", "display:none");
  175. }
  176. Fb2Handler::BaseHandler * Fb2Handler::HeadHandler::NewTag(const QString &name, const QXmlAttributes &attributes)
  177. {
  178. Q_UNUSED(attributes);
  179. return new HeadHandler(m_writer, name);
  180. }
  181. void Fb2Handler::HeadHandler::TxtTag(const QString &text)
  182. {
  183. m_writer.writeCharacters(text);
  184. }
  185. void Fb2Handler::HeadHandler::EndTag(const QString &name)
  186. {
  187. Q_UNUSED(name);
  188. m_writer.writeCharacters(" ");
  189. m_writer.writeEndElement();
  190. }
  191. //---------------------------------------------------------------------------
  192. // Fb2Handler::DescrHandler
  193. //---------------------------------------------------------------------------
  194. FB2_BEGIN_KEYHASH(DescrHandler)
  195. insert( "title-info" , Title );
  196. insert( "document-info" , Document );
  197. insert( "publish-info" , Publish );
  198. insert( "custom-info" , Custom );
  199. FB2_END_KEYHASH
  200. Fb2Handler::DescrHandler::DescrHandler(Fb2HtmlWriter &writer, const QString &name)
  201. : HeadHandler(writer, name)
  202. {
  203. m_writer.writeAttribute("id", m_writer.newId());
  204. }
  205. Fb2Handler::BaseHandler * Fb2Handler::DescrHandler::NewTag(const QString &name, const QXmlAttributes &attributes)
  206. {
  207. Q_UNUSED(attributes);
  208. switch (toKeyword(name)) {
  209. case Title :
  210. return new TitleHandler(m_writer, name);
  211. case Document :
  212. case Publish :
  213. case Custom :
  214. return new HeadHandler(m_writer, name, true);
  215. default:
  216. return NULL;
  217. }
  218. }
  219. //---------------------------------------------------------------------------
  220. // Fb2Handler::TitleHandler
  221. //---------------------------------------------------------------------------
  222. Fb2Handler::TitleHandler::TitleHandler(Fb2HtmlWriter &writer, const QString &name)
  223. : HeadHandler(writer, name)
  224. {
  225. m_writer.writeAttribute("id", m_writer.newId());
  226. }
  227. Fb2Handler::BaseHandler * Fb2Handler::TitleHandler::NewTag(const QString &name, const QXmlAttributes &attributes)
  228. {
  229. if (name == "annotation") return new BodyHandler(m_writer, name, attributes, "div", name);
  230. return new HeadHandler(m_writer, name, true);
  231. }
  232. //---------------------------------------------------------------------------
  233. // Fb2Handler::BodyHandler
  234. //---------------------------------------------------------------------------
  235. FB2_BEGIN_KEYHASH(BodyHandler)
  236. FB2_KEY( Section, "annotation" );
  237. FB2_KEY( Section, "author" );
  238. FB2_KEY( Section, "cite" );
  239. FB2_KEY( Section, "date" );
  240. FB2_KEY( Section, "epigraph" );
  241. FB2_KEY( Section, "poem" );
  242. FB2_KEY( Section, "section" );
  243. FB2_KEY( Section, "stanza" );
  244. FB2_KEY( Section, "subtitle" );
  245. FB2_KEY( Section, "title" );
  246. FB2_KEY( Anchor, "a" );
  247. FB2_KEY( Table, "table" );
  248. FB2_KEY( Image, "image" );
  249. FB2_KEY( Parag, "empty-line" );
  250. FB2_KEY( Parag, "p" );
  251. FB2_KEY( Parag, "v" );
  252. FB2_KEY( Style, "style" );
  253. FB2_KEY( Strong, "strong" );
  254. FB2_KEY( Emphas, "emphasis" );
  255. FB2_KEY( Strike, "strikethrough" );
  256. FB2_KEY( Sub, "sub" );
  257. FB2_KEY( Sup, "sup" );
  258. FB2_KEY( Code, "code" );
  259. FB2_END_KEYHASH
  260. Fb2Handler::BodyHandler::BodyHandler(Fb2HtmlWriter &writer, const QString &name, const QXmlAttributes &attributes, const QString &tag, const QString &style)
  261. : BaseHandler(writer, name)
  262. , m_parent(NULL)
  263. , m_tag(tag)
  264. , m_style(style)
  265. {
  266. Init(attributes);
  267. }
  268. Fb2Handler::BodyHandler::BodyHandler(BodyHandler *parent, const QString &name, const QXmlAttributes &attributes, const QString &tag, const QString &style)
  269. : BaseHandler(parent->m_writer, name)
  270. , m_parent(parent)
  271. , m_tag(tag)
  272. , m_style(style)
  273. {
  274. Init(attributes);
  275. }
  276. void Fb2Handler::BodyHandler::Init(const QXmlAttributes &attributes)
  277. {
  278. if (m_tag.isEmpty()) return;
  279. m_writer.writeStartElement(m_tag);
  280. QString id = Value(attributes, "id");
  281. if (!id.isEmpty()) {
  282. if (m_style == "section" && isNotes()) m_style = "note";
  283. m_writer.writeAttribute("id", id);
  284. } else if (m_tag == "div" || m_tag == "img") {
  285. m_writer.writeAttribute("id", m_writer.newId());
  286. }
  287. if (!m_style.isEmpty()) {
  288. if (m_style == "body" && Value(attributes, "name").toLower() == "notes") m_style = "notes";
  289. m_writer.writeAttribute("class", m_style);
  290. }
  291. }
  292. Fb2Handler::BaseHandler * Fb2Handler::BodyHandler::NewTag(const QString &name, const QXmlAttributes &attributes)
  293. {
  294. QString tag, style;
  295. switch (toKeyword(name)) {
  296. case Anchor : return new AnchorHandler(this, name, attributes);
  297. case Image : return new ImageHandler(this, name, attributes);
  298. case Section : tag = "div"; style = name; break;
  299. case Parag : tag = "p"; break;
  300. case Strong : tag = "b"; break;
  301. case Emphas : tag = "i"; break;
  302. case Strike : tag = "s"; break;
  303. case Code : tag = "tt"; break;
  304. case Sub : tag = "sub"; break;
  305. case Sup : tag = "sup"; break;
  306. }
  307. return new BodyHandler(this, name, attributes, tag, style);
  308. }
  309. void Fb2Handler::BodyHandler::TxtTag(const QString &text)
  310. {
  311. m_writer.writeCharacters(text);
  312. }
  313. void Fb2Handler::BodyHandler::EndTag(const QString &name)
  314. {
  315. Q_UNUSED(name);
  316. if (m_tag.isEmpty()) return;
  317. if (m_tag == "div") m_writer.writeCharacters(" ");
  318. m_writer.writeEndElement();
  319. }
  320. bool Fb2Handler::BodyHandler::isNotes() const
  321. {
  322. if (m_style == "notes") return true;
  323. return m_parent ? m_parent->isNotes() : false;
  324. }
  325. //---------------------------------------------------------------------------
  326. // Fb2Handler::AnchorHandler
  327. //---------------------------------------------------------------------------
  328. Fb2Handler::AnchorHandler::AnchorHandler(BodyHandler *parent, const QString &name, const QXmlAttributes &attributes)
  329. : BodyHandler(parent, name, attributes, "a")
  330. {
  331. QString href = Value(attributes, "href");
  332. m_writer.writeAttribute("href", href);
  333. }
  334. //---------------------------------------------------------------------------
  335. // Fb2Handler::ImageHandler
  336. //---------------------------------------------------------------------------
  337. Fb2Handler::ImageHandler::ImageHandler(BodyHandler *parent, const QString &name, const QXmlAttributes &attributes)
  338. : BodyHandler(parent, name, attributes, "img")
  339. {
  340. QString href = Value(attributes, "href");
  341. while (href.left(1) == "#") href.remove(0, 1);
  342. QString path = m_writer.getFile(href);
  343. m_writer.writeAttribute("src", path);
  344. m_writer.writeAttribute("alt", href);
  345. }
  346. //---------------------------------------------------------------------------
  347. // Fb2Handler::BinaryHandler
  348. //---------------------------------------------------------------------------
  349. Fb2Handler::BinaryHandler::BinaryHandler(Fb2HtmlWriter &writer, const QString &name, const QXmlAttributes &attributes)
  350. : BaseHandler(writer, name)
  351. , m_file(Value(attributes, "id"))
  352. {
  353. }
  354. void Fb2Handler::BinaryHandler::TxtTag(const QString &text)
  355. {
  356. m_text += text;
  357. }
  358. void Fb2Handler::BinaryHandler::EndTag(const QString &name)
  359. {
  360. Q_UNUSED(name);
  361. QByteArray in; in.append(m_text);
  362. if (!m_file.isEmpty()) m_writer.addFile(m_file, QByteArray::fromBase64(in));
  363. }
  364. //---------------------------------------------------------------------------
  365. // Fb2Handler
  366. //---------------------------------------------------------------------------
  367. Fb2Handler::Fb2Handler(Fb2ReadThread &thread)
  368. : QXmlDefaultHandler()
  369. , m_writer(thread)
  370. , m_handler(NULL)
  371. {
  372. m_writer.setAutoFormatting(true);
  373. }
  374. Fb2Handler::~Fb2Handler()
  375. {
  376. if (m_handler) delete m_handler;
  377. }
  378. bool Fb2Handler::startElement(const QString & namespaceURI, const QString & localName, const QString &qName, const QXmlAttributes &attributes)
  379. {
  380. Q_UNUSED(namespaceURI);
  381. Q_UNUSED(localName);
  382. const QString name = qName.toLower();
  383. if (m_handler) return m_handler->doStart(name, attributes);
  384. qCritical() << name;
  385. if (name == "fictionbook") {
  386. m_handler = new RootHandler(m_writer, name);
  387. return true;
  388. } else {
  389. m_error = QObject::tr("The file is not an FB2 file.");
  390. return false;
  391. }
  392. }
  393. static bool isWhiteSpace(const QString &str)
  394. {
  395. return str.simplified().isEmpty();
  396. }
  397. bool Fb2Handler::characters(const QString &str)
  398. {
  399. QString s = str.simplified();
  400. if (s.isEmpty()) return true;
  401. if (isWhiteSpace(str.left(1))) s.prepend(" ");
  402. if (isWhiteSpace(str.right(1))) s.append(" ");
  403. return m_handler && m_handler->doText(s);
  404. }
  405. bool Fb2Handler::endElement(const QString & namespaceURI, const QString & localName, const QString &qName)
  406. {
  407. Q_UNUSED(namespaceURI);
  408. Q_UNUSED(localName);
  409. bool found = false;
  410. return m_handler && m_handler->doEnd(qName.toLower(), found);
  411. }
  412. bool Fb2Handler::fatalError(const QXmlParseException &exception)
  413. {
  414. qCritical() << QObject::tr("Parse error at line %1, column %2: %3")
  415. .arg(exception.lineNumber())
  416. .arg(exception.columnNumber())
  417. .arg(exception.message());
  418. return false;
  419. }
  420. QString Fb2Handler::errorString() const
  421. {
  422. return m_error;
  423. }
  424. #undef FB2_BEGIN_KEYHASH
  425. #undef FB2_END_KEYHASH
  426. #undef FB2_KEY