abiword-dev Mailing List Archive: Re: volodymyr - r29899

From: Dominic Lachowicz <domlachowicz_at_gmail.com>
Date: Sat Jul 02 2011 - 18:02:33 CEST

You need X11 in an importer?

On Sat, Jul 2, 2011 at 10:35 AM, <cvs@abisource.com> wrote:
>
> Author: volodymyr
> Date: 2011-07-02 16:35:43 +0200 (Sat, 02 Jul 2011)
> New Revision: 29899
>
> Modified:
> abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp
> abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h
> abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp
> abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h
> Log:
> EPUB import plugin now can import EPUB files. Containers that have several OPS XHTML files are also supported.
>
>
> Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp
> ===================================================================
> --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp 2011-07-02 13:26:48 UTC (rev 29898)
> +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp 2011-07-02 14:35:43 UTC (rev 29899)
> @@ -18,6 +18,10 @@
> * 02111-1307, USA.
> */
>
> +#include <X11/X.h>
> +#include <stdexcept>
> +#include <zlib.h>
> +
> #include "ie_imp_EPUB.h"
>
> IE_Imp_EPUB::IE_Imp_EPUB(PD_Document* pDocument) : IE_Imp(pDocument)
> @@ -37,6 +41,353 @@
>
> UT_Error IE_Imp_EPUB::_loadFile(GsfInput* input)
> {
> + m_epub = gsf_infile_zip_new(input, NULL);
>
> + if (m_epub == NULL)
> + {
> + UT_DEBUGMSG(("Can`t create gsf input zip object\n"));
> + return UT_ERROR;
> + }
> +
> +
> + UT_DEBUGMSG(("Reading metadata\n"));
> + if (readMetadata() != UT_OK)
> + {
> + UT_DEBUGMSG(("Failed to read metadata\n"));
> + return UT_ERROR;
> + }
> +
> + UT_DEBUGMSG(("Reading package information\n"));
> + if (readPackage() != UT_OK)
> + {
> + UT_DEBUGMSG(("Failed to read package information\n"));
> + return UT_ERROR;
> + }
> +
> + UT_DEBUGMSG(("Uncompressing OPS data\n"));
> + if (uncompress() != UT_OK)
> + {
> + UT_DEBUGMSG(("Failed to uncompress data\n"));
> + return UT_ERROR;
> + }
> +
> + UT_DEBUGMSG(("Reading OPS data\n"));
> + if (readStructure() != UT_OK)
> + {
> + UT_DEBUGMSG(("Failed to read OPS data\n"));
> + return UT_ERROR;
> + }
> +
> + return UT_OK;
> +
> }
>
> +UT_Error IE_Imp_EPUB::readMetadata()
> +{
> + GsfInput* metaInf = gsf_infile_child_by_name(m_epub, "META-INF");
> +
> + if (metaInf == NULL)
> + {
> + UT_DEBUGMSG(("Can`t open container META-INF dir\n"));
> + return UT_ERROR;
> + }
> +
> + GsfInput* meta = gsf_infile_child_by_name(GSF_INFILE(metaInf), "container.xml");
> +
> + if (meta == NULL)
> + {
> + UT_DEBUGMSG(("Can`t open container metadata\n"));
> + return UT_ERROR;
> + }
> +
> + size_t metaSize = gsf_input_size(meta);
> +
> + if (metaSize == 0)
> + {
> + UT_DEBUGMSG(("Container metadata file is empty\n"));
> + return UT_ERROR;
> + }
> +
> + gchar* metaXml = (gchar*)gsf_input_read(meta, metaSize, NULL);
> +
> +
> + UT_UTF8String rootfilePath;
> + UT_XML metaParser;
> + ContainerListener containerListener;
> + metaParser.setListener(&containerListener);
> +
> + if (metaParser.sniff(metaXml, metaSize, "container"))
> + {
> + UT_DEBUGMSG(("Parsing container.xml file\n"));
> + metaParser.parse(metaXml, metaSize);
> + } else
> + {
> + UT_DEBUGMSG(("Incorrect container.xml file\n"));
> + return UT_ERROR;
> + }
> +
> + m_rootfilePath = containerListener.getRootFilePath();
> +
> + g_object_unref(G_OBJECT(meta));
> + g_object_unref(G_OBJECT(metaInf));
> +
> + return UT_OK;
> +}
> +
> +UT_Error IE_Imp_EPUB::readPackage()
> +{
> + gchar **aname = g_strsplit(m_rootfilePath.utf8_str(), G_DIR_SEPARATOR_S, 0);
> + GsfInput* opf = gsf_infile_child_by_aname(m_epub, (const char**)aname);
> +
> + UT_DEBUGMSG(("Getting parent\n"));
> + GsfInfile* opfParent = gsf_input_container(opf);
> + m_opsDir = UT_UTF8String(gsf_input_name(GSF_INPUT(opfParent)));
> +
> + UT_DEBUGMSG(("OPS dir: %s\n", m_opsDir.utf8_str()));
> +
> + if (opf == NULL){
> + UT_DEBUGMSG(("Can`t open .opf file\n"));
> + return UT_ERROR;
> + }
> +
> + size_t opfSize = gsf_input_size(opf);
> + gchar* opfXml = (gchar*)gsf_input_read(opf, opfSize, NULL);
> +
> + UT_XML opfParser;
> + OpfListener opfListener;
> + opfParser.setListener(&opfListener);
> + if (opfParser.sniff(opfXml, opfSize, "package"))
> + {
> + UT_DEBUGMSG(("Parsing opf file\n"));
> + opfParser.parse(opfXml, opfSize);
> + } else
> + {
> + UT_DEBUGMSG(("Incorrect opf file found \n"));
> + return UT_ERROR;
> + }
> +
> + g_strfreev(aname);
> + g_object_unref(G_OBJECT(opf));
> + //g_object_unref(G_OBJECT(opfParent));
> +
> + m_spine = opfListener.getSpine();
> + m_manifestItems = opfListener.getManifestItems();
> +
> + return UT_OK;
> +}
> +
> +UT_Error IE_Imp_EPUB::uncompress()
> +{
> + m_tmpDir = UT_go_filename_to_uri(g_get_tmp_dir());
> + m_tmpDir += G_DIR_SEPARATOR_S;
> + m_tmpDir += getDoc()->getDocUUIDString();
> +
> + if (!UT_go_directory_create(m_tmpDir.utf8_str(), 0644, NULL))
> + {
> + UT_DEBUGMSG(("Can`t create temporary directory\n"));
> + return UT_ERROR;
> + }
> + GsfInput *opsDirInput = gsf_infile_child_by_name(m_epub, m_opsDir.utf8_str());
> + UT_DEBUGMSG(("Child count : %d", gsf_infile_num_children(m_epub)));
> + if (opsDirInput == NULL)
> + {
> + UT_DEBUGMSG(("Failed to open OPS dir\n"));
> + return UT_ERROR;
> + }
> +
> + for(std::map<UT_UTF8String, UT_UTF8String>::iterator i = m_manifestItems.begin(); i != m_manifestItems.end(); i++)
> + {
> + gchar *itemFileName = UT_go_filename_from_uri((m_tmpDir + G_DIR_SEPARATOR_S + (*i).second).utf8_str());
> + gchar** aname = g_strsplit((*i).second.utf8_str(), G_DIR_SEPARATOR_S, 0);
> +
> +
> + GsfInput* itemInput = gsf_infile_child_by_aname(GSF_INFILE(opsDirInput), (const char**)aname);
> + GsfOutput* itemOutput = createFileByPath(itemFileName);
> + gsf_input_seek(itemInput, 0, G_SEEK_SET);
> + gsf_input_copy(itemInput, itemOutput);
> + g_strfreev(aname);
> + g_free(itemFileName);
> + g_object_unref(G_OBJECT(itemInput));
> + gsf_output_close(itemOutput);
> + }
> +
> + g_object_unref(G_OBJECT(opsDirInput));
> +
> +
> + return UT_OK;
> +}
> +
> +UT_Error IE_Imp_EPUB::readStructure()
> +{
> + getDoc()->createRawDocument();
> + getDoc()->finishRawCreation();
> +
> + for(std::vector<UT_UTF8String>::iterator i = m_spine.begin(); i != m_spine.end(); i++)
> + {
> + try
> + {
> +
> + UT_UTF8String itemPath = m_tmpDir + G_DIR_SEPARATOR_S + m_manifestItems.at(*i);
> + PT_DocPosition posEnd = 0;
> + getDoc()->getBounds(true, posEnd);
> +
> + GsfInput* itemInput = UT_go_file_open(itemPath.utf8_str(), NULL);
> + size_t inputSize = gsf_input_size(itemInput);
> + gchar* inputData = (gchar*)gsf_input_read(itemInput, inputSize, NULL);
> +
> + PD_Document *currentDoc = new PD_Document();
> + currentDoc->createRawDocument();
> + const char *suffix = strchr(itemPath.utf8_str(), '.');
> + currentDoc->importFile(itemPath.utf8_str(),
> + IE_Imp::fileTypeForSuffix(suffix), true, false, NULL);
> + currentDoc->finishRawCreation();
> +
> + IE_Imp_PasteListener * pPasteListener = new IE_Imp_PasteListener(getDoc(),posEnd, currentDoc);
> + currentDoc->tellListener(static_cast<PL_Listener *>(pPasteListener));
> +
> + DELETEP(pPasteListener);
> + UNREFP(currentDoc);
> + g_object_unref(G_OBJECT(itemInput));
> +
> + } catch (std::out_of_range e)
> + {
> + return UT_ERROR;
> + }
> + }
> +
> + return UT_OK;
> +}
> +
> +GsfOutput* IE_Imp_EPUB::createFileByPath(const char* path)
> +{
> + gchar** components = g_strsplit(path, G_DIR_SEPARATOR_S, 0);
> + UT_UTF8String curPath = UT_UTF8String(components[0]);
> +
> + int current = 0;
> + GsfOutput* output = NULL;
> + while (components[current] != NULL)
> + {
> + curPath += components[current];
> + current++;
> +
> + char *uri = UT_go_filename_to_uri(curPath.utf8_str());
> + bool fileExists = UT_go_file_exists(uri);
> + if (!fileExists && (components[current] != NULL))
> + {
> + UT_go_directory_create(uri, 0644, NULL);
> + } else
> + {
> + if (!fileExists)
> + {
> + output = UT_go_file_create(uri, NULL);
> + break;
> + }
> + }
> +
> + g_free(uri);
> +
> + if (components[current] != NULL)
> + {
> + curPath += G_DIR_SEPARATOR_S;
> + }
> + }
> +
> + g_strfreev(components);
> + return output;
> +}
> +
> +void ContainerListener::startElement(const gchar* name, const gchar** atts)
> +{
> + if (!UT_go_utf8_collate_casefold(name, "rootfile"))
> + {
> + m_rootFilePath = UT_UTF8String(UT_getAttribute("full-path", atts));
> + UT_DEBUGMSG(("Found rootfile%s\n", m_rootFilePath.utf8_str()));
> + }
> +}
> +
> +void ContainerListener::endElement(const gchar* name)
> +{
> +}
> +
> +void ContainerListener::charData(const gchar* buffer, int length)
> +{
> +
> +}
> +
> +UT_UTF8String ContainerListener::getRootFilePath() const
> +{
> + return m_rootFilePath;
> +}
> +
> +/*
> +
> + */
> +
> +OpfListener::OpfListener():
> + m_inManifest(false)
> +{
> +
> +}
> +
> +void OpfListener::startElement(const gchar* name, const gchar** atts)
> +{
> + if (!UT_go_utf8_collate_casefold(name, "manifest"))
> + {
> + m_inManifest = true;
> + }
> +
> + if (!UT_go_utf8_collate_casefold(name, "spine"))
> + {
> + m_inSpine = true;
> + }
> +
> + if (m_inManifest)
> + {
> + if (!UT_go_utf8_collate_casefold(name, "item"))
> + {
> + m_manifestItems.insert(string_pair(UT_UTF8String(UT_getAttribute("id", atts)),
> + UT_UTF8String(UT_getAttribute("href", atts))));
> + UT_DEBUGMSG(("Found manifest item: %s\n", UT_getAttribute("href", atts)));
> + }
> + }
> +
> + if (m_inSpine)
> + {
> + if (!UT_go_utf8_collate_casefold(name, "itemref"))
> + {
> + // We can ignore "linear" attribute as it said in specification
> + m_spine.push_back(UT_UTF8String(UT_getAttribute("idref", atts)));
> + UT_DEBUGMSG(("Found spine itemref: %s\n", UT_getAttribute("idref", atts)));
> + }
> + }
> +
> +}
> +
> +void OpfListener::endElement(const gchar* name)
> +{
> +
> +}
> +
> +void OpfListener::charData(const gchar* buffer, int length)
> +{
> +
> +}
> +
> +/*
> +
> + */
> +
> +void NavigationListener::startElement(const gchar* name, const gchar** atts)
> +{
> +
> +}
> +
> +void NavigationListener::endElement(const gchar* name)
> +{
> +
> +}
> +
> +void NavigationListener::charData(const gchar* buffer, int length)
> +{
> +
> +}
> \ No newline at end of file
>
> Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h
> ===================================================================
> --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h 2011-07-02 13:26:48 UTC (rev 29898)
> +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h 2011-07-02 14:35:43 UTC (rev 29899)
> @@ -21,28 +21,108 @@
> #ifndef IE_IMP_EPUB_H_
> #define IE_IMP_EPUB_H_
>
> -#include "ie_imp.h"
> +#include <gsf/gsf-infile-zip.h>
> +#include <gsf/gsf-infile.h>
> +#include <gsf/gsf-libxml.h>
> +#include <ut_go_file.h>
> +#include <vector>
> +#include <map>
>
> +
> +// AbiWord includes
> +#include <ie_imp.h>
> +#include <ie_imp_XHTML.h>
> +#include <ut_xml.h>
> +#include <ie_imp_PasteListener.h>
> +
> #define EPUB_MIMETYPE "application/epub+zip"
>
>
> +typedef std::pair<UT_UTF8String, UT_UTF8String> string_pair;
> /**
> * Class used to import EPUB files
> */
> class IE_Imp_EPUB : public IE_Imp
> {
> public:
> -
> IE_Imp_EPUB (PD_Document * pDocument);
> virtual ~IE_Imp_EPUB ();
> - virtual bool pasteFromBuffer(PD_DocumentRange * pDocRange,
> + virtual bool pasteFromBuffer(PD_DocumentRange * pDocRange,
> const unsigned char * pData,
> UT_uint32 lenData,
> const char * szEncoding = 0);
> -
> protected:
> - virtual UT_Error _loadFile(GsfInput * input);
> + virtual UT_Error _loadFile(GsfInput * input);
> +
> +private:
> + GsfInfile* m_epub;
> + UT_UTF8String m_rootfilePath;
> + UT_UTF8String m_tmpDir;
> + UT_UTF8String m_opsDir;
> + std::vector<UT_UTF8String> m_spine;
> + std::map<UT_UTF8String, UT_UTF8String> m_manifestItems;
> +
> + UT_Error readMetadata();
> + UT_Error readPackage();
> + UT_Error uncompress();
> + UT_Error readStructure();
> + static GsfOutput* createFileByPath(const char* path);
> };
>
> +/*
> + * Listener for parsing container.xml data
> + */
> +class ContainerListener : public UT_XML::Listener
> +{
> +public:
> + void startElement (const gchar * name, const gchar ** atts);
> + void endElement (const gchar * name);
> + void charData (const gchar * buffer, int length);
> +
> + UT_UTF8String getRootFilePath() const;
> +
> +private:
> + UT_UTF8String m_rootFilePath;
> +};
> +
> +/*
> + * Listener for parsing .opf
> + */
> +class OpfListener : public UT_XML::Listener
> +{
> +public:
> + void startElement (const gchar * name, const gchar ** atts);
> + void endElement (const gchar * name);
> + void charData (const gchar * buffer, int length);
> +
> + std::map<UT_UTF8String, UT_UTF8String> getManifestItems() const { return m_manifestItems; }
> + std::vector<UT_UTF8String> getSpine() const { return m_spine; }
> +
> + OpfListener();
> +
> +private:
> + /* Vector with list of OPS files needed to be imported. Sorted in the linear
> + * reading order
> + */
> + std::vector<UT_UTF8String> m_spine;
> + /* Map with all files that will be used for import
> + */
> + std::map<UT_UTF8String, UT_UTF8String> m_manifestItems;
> +
> + bool m_inManifest;
> + bool m_inSpine;
> +};
> +
> +/*
> + * Listener for parsing .ncx
> + */
> +class NavigationListener : public UT_XML::Listener
> +{
> +public:
> + void startElement (const gchar * name, const gchar ** atts);
> + void endElement (const gchar * name);
> + void charData (const gchar * buffer, int length);
> +};
> +
> #endif
>
>
> Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp
> ===================================================================
> --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp 2011-07-02 13:26:48 UTC (rev 29898)
> +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp 2011-07-02 14:35:43 UTC (rev 29899)
> @@ -39,7 +39,7 @@
> IE_Imp_EPUB_Sniffer::IE_Imp_EPUB_Sniffer() :
> IE_ImpSniffer("EPUB::EPUB")
> {
> -
> + UT_DEBUGMSG(("Constructing sniffer\n"));
> }
>
> IE_Imp_EPUB_Sniffer::~IE_Imp_EPUB_Sniffer()
> @@ -49,16 +49,19 @@
>
> const IE_SuffixConfidence * IE_Imp_EPUB_Sniffer::getSuffixConfidence()
> {
> + UT_DEBUGMSG(("Recognizing suffixes\n"));
> return IE_Imp_EPUB_Sniffer_SuffixConfidence;
> }
>
> const IE_MimeConfidence * IE_Imp_EPUB_Sniffer::getMimeConfidence()
> {
> - return IE_Imp_EPUB_Sniffer_MimeConfidence;
> + UT_DEBUGMSG(("Recognizing mime type\n"));
> + return IE_Imp_EPUB_Sniffer_MimeConfidence;
> }
>
> UT_Confidence_t IE_Imp_EPUB_Sniffer::recognizeContents(GsfInput * input)
> {
> + UT_DEBUGMSG(("Recognizing contents\n"));
> GsfInfile* zip = gsf_infile_zip_new(input, NULL);
> UT_Confidence_t confidence = UT_CONFIDENCE_ZILCH;
> if (zip != NULL)
> @@ -67,19 +70,27 @@
>
> if (mimetype != NULL)
> {
> - gsf_off_t size = gsf_input_size(mimetype);
> + UT_DEBUGMSG(("Opened 'mimetype' file\n"));
> + size_t size = gsf_input_size(mimetype);
>
> if (size > 0)
> {
> - gchar* mime = (gchar*)gsf_input_read(mimetype, size, NULL);
> + UT_DEBUGMSG(("Reading 'mimetype' file contents\n"));
> + gchar* pMime = (gchar*)gsf_input_read(mimetype, size, NULL);
> + UT_UTF8String mimeStr;
> + mimeStr.append(pMime, size);
>
> - if (!strcmp(mime, EPUB_MIMETYPE))
> + if (!strcmp(mimeStr.utf8_str(), EPUB_MIMETYPE))
> {
> + UT_DEBUGMSG(("RUDYJ: Found EPUB\n"));
> confidence = UT_CONFIDENCE_PERFECT;
> - }
> - g_free(mime);
> + }
> }
> - }
> +
> + g_object_unref(G_OBJECT(mimetype));
> + }
> +
> + g_object_unref(G_OBJECT(zip));
> }
>
> return confidence;
> @@ -88,7 +99,9 @@
> UT_Error IE_Imp_EPUB_Sniffer::constructImporter(PD_Document * pDocument,
> IE_Imp ** ppie)
> {
> - *ppie = new IE_Imp_EPUB(pDocument);
> + UT_DEBUGMSG(("Constructing importer\n"));
> + IE_Imp_EPUB* importer = new IE_Imp_EPUB(pDocument);
> + *ppie = importer;
>
> return UT_OK;
> }
>
> Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h
> ===================================================================
> --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h 2011-07-02 13:26:48 UTC (rev 29898)
> +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h 2011-07-02 14:35:43 UTC (rev 29899)
> @@ -21,11 +21,12 @@
> #ifndef IE_IMP_EPUB_SNIFFER_H_
> #define IE_IMP_EPUB_SNIFFER_H_
>
> +#include <gsf/gsf-infile-zip.h>
> #include <gsf/gsf-infile.h>
> -#include <gsf/gsf-infile-zip.h>
> +#include <gsf/gsf-libxml.h>
>
> +#include "ie_imp.h"
> #include "ie_imp_EPUB.h"
> -#include "ie_imp.h"
>
>
> class IE_Imp_EPUB_Sniffer : public IE_ImpSniffer
>
> -----------------------------------------------
> To unsubscribe from this list, send a message to
> abisource-cvs-commit-request@abisource.com with the word
> unsubscribe in the message body.
>

-- 
"I like to pay taxes. With them, I buy civilization." --  Oliver Wendell Holmes

Received on Sat Jul 2 18:02:40 2011

This archive was generated by hypermail 2.1.8 : Sat Jul 02 2011 - 18:02:40 CEST

Re: volodymyr - r29899 - abiword/trunk/plugins/epub/imp/xp