Re: volodymyr - r29899 - abiword/trunk/plugins/epub/imp/xp

From: Dominic Lachowicz <domlachowicz_at_gmail.com>
Date: Sat Jul 02 2011 - 18:02:33 CEST

You need X11 in an importer?

On Sat, Jul 2, 2011 at 10:35 AM, <cvs@abisource.com> wrote:
>
> Author: volodymyr
> Date: 2011-07-02 16:35:43 +0200 (Sat, 02 Jul 2011)
> New Revision: 29899
>
> Modified:
>   abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp
>   abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h
>   abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp
>   abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h
> Log:
> EPUB import plugin now can import EPUB files. Containers that have several OPS XHTML files are also supported.
>
>
> Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp
> ===================================================================
> --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp   2011-07-02 13:26:48 UTC (rev 29898)
> +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp   2011-07-02 14:35:43 UTC (rev 29899)
> @@ -18,6 +18,10 @@
>  * 02111-1307, USA.
>  */
>
> +#include <X11/X.h>
> +#include <stdexcept>
> +#include <zlib.h>
> +
>  #include "ie_imp_EPUB.h"
>
>  IE_Imp_EPUB::IE_Imp_EPUB(PD_Document* pDocument) : IE_Imp(pDocument)
> @@ -37,6 +41,353 @@
>
>  UT_Error IE_Imp_EPUB::_loadFile(GsfInput* input)
>  {
> +    m_epub = gsf_infile_zip_new(input, NULL);
>
> +    if (m_epub == NULL)
> +    {
> +        UT_DEBUGMSG(("Can`t create gsf input zip object\n"));
> +        return UT_ERROR;
> +    }
> +
> +
> +    UT_DEBUGMSG(("Reading metadata\n"));
> +    if (readMetadata() != UT_OK)
> +    {
> +        UT_DEBUGMSG(("Failed to read metadata\n"));
> +        return UT_ERROR;
> +    }
> +
> +    UT_DEBUGMSG(("Reading package information\n"));
> +    if (readPackage() != UT_OK)
> +    {
> +        UT_DEBUGMSG(("Failed to read package information\n"));
> +        return UT_ERROR;
> +    }
> +
> +    UT_DEBUGMSG(("Uncompressing OPS data\n"));
> +    if (uncompress() != UT_OK)
> +    {
> +        UT_DEBUGMSG(("Failed to uncompress data\n"));
> +        return UT_ERROR;
> +    }
> +
> +    UT_DEBUGMSG(("Reading OPS data\n"));
> +    if (readStructure() != UT_OK)
> +    {
> +        UT_DEBUGMSG(("Failed to read OPS data\n"));
> +        return UT_ERROR;
> +    }
> +
> +    return UT_OK;
> +
>  }
>
> +UT_Error IE_Imp_EPUB::readMetadata()
> +{
> +    GsfInput* metaInf = gsf_infile_child_by_name(m_epub, "META-INF");
> +
> +    if (metaInf == NULL)
> +    {
> +        UT_DEBUGMSG(("Can`t open container META-INF dir\n"));
> +        return UT_ERROR;
> +    }
> +
> +    GsfInput* meta = gsf_infile_child_by_name(GSF_INFILE(metaInf), "container.xml");
> +
> +    if (meta == NULL)
> +    {
> +        UT_DEBUGMSG(("Can`t open container metadata\n"));
> +        return UT_ERROR;
> +    }
> +
> +    size_t metaSize = gsf_input_size(meta);
> +
> +    if (metaSize == 0)
> +    {
> +        UT_DEBUGMSG(("Container metadata file is empty\n"));
> +        return UT_ERROR;
> +    }
> +
> +    gchar* metaXml = (gchar*)gsf_input_read(meta, metaSize, NULL);
> +
> +
> +    UT_UTF8String rootfilePath;
> +    UT_XML metaParser;
> +    ContainerListener containerListener;
> +    metaParser.setListener(&containerListener);
> +
> +    if (metaParser.sniff(metaXml, metaSize, "container"))
> +    {
> +        UT_DEBUGMSG(("Parsing container.xml file\n"));
> +        metaParser.parse(metaXml, metaSize);
> +    } else
> +    {
> +        UT_DEBUGMSG(("Incorrect container.xml file\n"));
> +        return UT_ERROR;
> +    }
> +
> +    m_rootfilePath = containerListener.getRootFilePath();
> +
> +    g_object_unref(G_OBJECT(meta));
> +    g_object_unref(G_OBJECT(metaInf));
> +
> +    return UT_OK;
> +}
> +
> +UT_Error IE_Imp_EPUB::readPackage()
> +{
> +    gchar **aname = g_strsplit(m_rootfilePath.utf8_str(), G_DIR_SEPARATOR_S, 0);
> +    GsfInput* opf = gsf_infile_child_by_aname(m_epub, (const char**)aname);
> +
> +    UT_DEBUGMSG(("Getting parent\n"));
> +    GsfInfile* opfParent = gsf_input_container(opf);
> +    m_opsDir = UT_UTF8String(gsf_input_name(GSF_INPUT(opfParent)));
> +
> +    UT_DEBUGMSG(("OPS dir: %s\n", m_opsDir.utf8_str()));
> +
> +    if (opf == NULL){
> +        UT_DEBUGMSG(("Can`t open .opf file\n"));
> +        return UT_ERROR;
> +    }
> +
> +    size_t opfSize = gsf_input_size(opf);
> +    gchar* opfXml = (gchar*)gsf_input_read(opf, opfSize, NULL);
> +
> +    UT_XML opfParser;
> +    OpfListener opfListener;
> +    opfParser.setListener(&opfListener);
> +    if (opfParser.sniff(opfXml, opfSize, "package"))
> +    {
> +        UT_DEBUGMSG(("Parsing opf file\n"));
> +        opfParser.parse(opfXml, opfSize);
> +    } else
> +    {
> +        UT_DEBUGMSG(("Incorrect opf file found \n"));
> +        return UT_ERROR;
> +    }
> +
> +    g_strfreev(aname);
> +    g_object_unref(G_OBJECT(opf));
> +    //g_object_unref(G_OBJECT(opfParent));
> +
> +    m_spine = opfListener.getSpine();
> +    m_manifestItems = opfListener.getManifestItems();
> +
> +    return UT_OK;
> +}
> +
> +UT_Error IE_Imp_EPUB::uncompress()
> +{
> +    m_tmpDir = UT_go_filename_to_uri(g_get_tmp_dir());
> +    m_tmpDir += G_DIR_SEPARATOR_S;
> +    m_tmpDir += getDoc()->getDocUUIDString();
> +
> +    if (!UT_go_directory_create(m_tmpDir.utf8_str(), 0644, NULL))
> +    {
> +        UT_DEBUGMSG(("Can`t create temporary directory\n"));
> +        return UT_ERROR;
> +    }
> +     GsfInput *opsDirInput = gsf_infile_child_by_name(m_epub, m_opsDir.utf8_str());
> +     UT_DEBUGMSG(("Child count : %d", gsf_infile_num_children(m_epub)));
> +     if (opsDirInput == NULL)
> +     {
> +        UT_DEBUGMSG(("Failed to open OPS dir\n"));
> +        return UT_ERROR;
> +     }
> +
> +    for(std::map<UT_UTF8String, UT_UTF8String>::iterator i = m_manifestItems.begin(); i != m_manifestItems.end(); i++)
> +    {
> +        gchar *itemFileName = UT_go_filename_from_uri((m_tmpDir + G_DIR_SEPARATOR_S + (*i).second).utf8_str());
> +        gchar** aname = g_strsplit((*i).second.utf8_str(), G_DIR_SEPARATOR_S, 0);
> +
> +
> +        GsfInput* itemInput = gsf_infile_child_by_aname(GSF_INFILE(opsDirInput), (const char**)aname);
> +        GsfOutput* itemOutput = createFileByPath(itemFileName);
> +        gsf_input_seek(itemInput, 0, G_SEEK_SET);
> +        gsf_input_copy(itemInput, itemOutput);
> +        g_strfreev(aname);
> +        g_free(itemFileName);
> +        g_object_unref(G_OBJECT(itemInput));
> +        gsf_output_close(itemOutput);
> +    }
> +
> +    g_object_unref(G_OBJECT(opsDirInput));
> +
> +
> +    return UT_OK;
> +}
> +
> +UT_Error IE_Imp_EPUB::readStructure()
> +{
> +    getDoc()->createRawDocument();
> +    getDoc()->finishRawCreation();
> +
> +    for(std::vector<UT_UTF8String>::iterator i = m_spine.begin(); i != m_spine.end(); i++)
> +    {
> +        try
> +        {
> +
> +            UT_UTF8String itemPath = m_tmpDir + G_DIR_SEPARATOR_S + m_manifestItems.at(*i);
> +            PT_DocPosition posEnd = 0;
> +            getDoc()->getBounds(true, posEnd);
> +
> +            GsfInput* itemInput = UT_go_file_open(itemPath.utf8_str(), NULL);
> +            size_t inputSize = gsf_input_size(itemInput);
> +            gchar* inputData = (gchar*)gsf_input_read(itemInput, inputSize, NULL);
> +
> +            PD_Document *currentDoc = new PD_Document();
> +            currentDoc->createRawDocument();
> +            const char *suffix = strchr(itemPath.utf8_str(), '.');
> +            currentDoc->importFile(itemPath.utf8_str(),
> +                                   IE_Imp::fileTypeForSuffix(suffix), true, false, NULL);
> +            currentDoc->finishRawCreation();
> +
> +            IE_Imp_PasteListener * pPasteListener = new  IE_Imp_PasteListener(getDoc(),posEnd, currentDoc);
> +            currentDoc->tellListener(static_cast<PL_Listener *>(pPasteListener));
> +
> +            DELETEP(pPasteListener);
> +            UNREFP(currentDoc);
> +            g_object_unref(G_OBJECT(itemInput));
> +
> +        } catch (std::out_of_range e)
> +        {
> +            return UT_ERROR;
> +        }
> +    }
> +
> +        return UT_OK;
> +}
> +
> +GsfOutput* IE_Imp_EPUB::createFileByPath(const char* path)
> +{
> +    gchar** components = g_strsplit(path, G_DIR_SEPARATOR_S, 0);
> +    UT_UTF8String curPath = UT_UTF8String(components[0]);
> +
> +    int current = 0;
> +    GsfOutput* output = NULL;
> +    while (components[current] != NULL)
> +    {
> +        curPath += components[current];
> +        current++;
> +
> +        char *uri = UT_go_filename_to_uri(curPath.utf8_str());
> +        bool fileExists = UT_go_file_exists(uri);
> +        if (!fileExists &&  (components[current] != NULL))
> +        {
> +            UT_go_directory_create(uri, 0644, NULL);
> +        } else
> +        {
> +            if (!fileExists)
> +            {
> +                output = UT_go_file_create(uri, NULL);
> +                break;
> +            }
> +        }
> +
> +        g_free(uri);
> +
> +        if (components[current] != NULL)
> +        {
> +            curPath += G_DIR_SEPARATOR_S;
> +        }
> +    }
> +
> +    g_strfreev(components);
> +    return output;
> +}
> +
> +void ContainerListener::startElement(const gchar* name, const gchar** atts)
> +{
> +    if (!UT_go_utf8_collate_casefold(name, "rootfile"))
> +    {
> +        m_rootFilePath = UT_UTF8String(UT_getAttribute("full-path", atts));
> +        UT_DEBUGMSG(("Found rootfile%s\n", m_rootFilePath.utf8_str()));
> +    }
> +}
> +
> +void ContainerListener::endElement(const gchar* name)
> +{
> +}
> +
> +void ContainerListener::charData(const gchar* buffer, int length)
> +{
> +
> +}
> +
> +UT_UTF8String ContainerListener::getRootFilePath() const
> +{
> +    return m_rootFilePath;
> +}
> +
> +/*
> +
> + */
> +
> +OpfListener::OpfListener():
> +        m_inManifest(false)
> +{
> +
> +}
> +
> +void OpfListener::startElement(const gchar* name, const gchar** atts)
> +{
> +    if (!UT_go_utf8_collate_casefold(name, "manifest"))
> +    {
> +        m_inManifest = true;
> +    }
> +
> +    if (!UT_go_utf8_collate_casefold(name, "spine"))
> +    {
> +        m_inSpine = true;
> +    }
> +
> +    if (m_inManifest)
> +    {
> +        if (!UT_go_utf8_collate_casefold(name, "item"))
> +        {
> +            m_manifestItems.insert(string_pair(UT_UTF8String(UT_getAttribute("id", atts)),
> +                                               UT_UTF8String(UT_getAttribute("href", atts))));
> +            UT_DEBUGMSG(("Found manifest item: %s\n", UT_getAttribute("href", atts)));
> +        }
> +    }
> +
> +    if (m_inSpine)
> +    {
> +        if (!UT_go_utf8_collate_casefold(name, "itemref"))
> +        {
> +            // We can ignore "linear" attribute as it said in specification
> +            m_spine.push_back(UT_UTF8String(UT_getAttribute("idref", atts)));
> +            UT_DEBUGMSG(("Found spine itemref: %s\n", UT_getAttribute("idref", atts)));
> +        }
> +    }
> +
> +}
> +
> +void OpfListener::endElement(const gchar* name)
> +{
> +
> +}
> +
> +void OpfListener::charData(const gchar* buffer, int length)
> +{
> +
> +}
> +
> +/*
> +
> + */
> +
> +void NavigationListener::startElement(const gchar* name, const gchar** atts)
> +{
> +
> +}
> +
> +void NavigationListener::endElement(const gchar* name)
> +{
> +
> +}
> +
> +void NavigationListener::charData(const gchar* buffer, int length)
> +{
> +
> +}
> \ No newline at end of file
>
> Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h
> ===================================================================
> --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h     2011-07-02 13:26:48 UTC (rev 29898)
> +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h     2011-07-02 14:35:43 UTC (rev 29899)
> @@ -21,28 +21,108 @@
>  #ifndef IE_IMP_EPUB_H_
>  #define IE_IMP_EPUB_H_
>
> -#include "ie_imp.h"
> +#include <gsf/gsf-infile-zip.h>
> +#include <gsf/gsf-infile.h>
> +#include <gsf/gsf-libxml.h>
> +#include <ut_go_file.h>
> +#include <vector>
> +#include <map>
>
> +
> +// AbiWord includes
> +#include <ie_imp.h>
> +#include <ie_imp_XHTML.h>
> +#include <ut_xml.h>
> +#include <ie_imp_PasteListener.h>
> +
>  #define EPUB_MIMETYPE "application/epub+zip"
>
>
> +typedef std::pair<UT_UTF8String, UT_UTF8String> string_pair;
>  /**
>  * Class used to import EPUB files
>  */
>  class IE_Imp_EPUB : public IE_Imp
>  {
>  public:
> -
>     IE_Imp_EPUB (PD_Document * pDocument);
>     virtual ~IE_Imp_EPUB ();
> -   virtual bool   pasteFromBuffer(PD_DocumentRange * pDocRange,
> +    virtual bool   pasteFromBuffer(PD_DocumentRange * pDocRange,
>                                const unsigned char * pData,
>                                UT_uint32 lenData,
>                                const char * szEncoding = 0);
> -
>  protected:
> -    virtual UT_Error _loadFile(GsfInput * input);
> +     virtual UT_Error _loadFile(GsfInput * input);
> +
> +private:
> +    GsfInfile* m_epub;
> +    UT_UTF8String m_rootfilePath;
> +    UT_UTF8String m_tmpDir;
> +    UT_UTF8String m_opsDir;
> +    std::vector<UT_UTF8String> m_spine;
> +    std::map<UT_UTF8String, UT_UTF8String> m_manifestItems;
> +
> +    UT_Error readMetadata();
> +    UT_Error readPackage();
> +    UT_Error uncompress();
> +    UT_Error readStructure();
> +    static GsfOutput* createFileByPath(const char* path);
>  };
>
> +/*
> + * Listener for parsing container.xml data
> + */
> +class ContainerListener : public UT_XML::Listener
> +{
> +public:
> +      void startElement (const gchar * name, const gchar ** atts);
> +      void endElement (const gchar * name);
> +      void charData (const gchar * buffer, int length);
> +
> +      UT_UTF8String getRootFilePath() const;
> +
> +private:
> +    UT_UTF8String m_rootFilePath;
> +};
> +
> +/*
> + * Listener for parsing .opf
> + */
> +class OpfListener : public UT_XML::Listener
> +{
> +public:
> +      void startElement (const gchar * name, const gchar ** atts);
> +      void endElement (const gchar * name);
> +      void charData (const gchar * buffer, int length);
> +
> +      std::map<UT_UTF8String, UT_UTF8String> getManifestItems() const { return  m_manifestItems; }
> +      std::vector<UT_UTF8String> getSpine() const { return m_spine; }
> +
> +      OpfListener();
> +
> +private:
> +    /* Vector with list of OPS files needed to be imported. Sorted in the linear
> +     * reading order
> +     */
> +    std::vector<UT_UTF8String> m_spine;
> +    /* Map with all files that will be used for import
> +     */
> +    std::map<UT_UTF8String, UT_UTF8String> m_manifestItems;
> +
> +    bool m_inManifest;
> +    bool m_inSpine;
> +};
> +
> +/*
> + * Listener for parsing .ncx
> + */
> +class NavigationListener : public UT_XML::Listener
> +{
> +public:
> +      void startElement (const gchar * name, const gchar ** atts);
> +      void endElement (const gchar * name);
> +      void charData (const gchar * buffer, int length);
> +};
> +
>  #endif
>
>
> Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp
> ===================================================================
> --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp   2011-07-02 13:26:48 UTC (rev 29898)
> +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp   2011-07-02 14:35:43 UTC (rev 29899)
> @@ -39,7 +39,7 @@
>  IE_Imp_EPUB_Sniffer::IE_Imp_EPUB_Sniffer() :
>        IE_ImpSniffer("EPUB::EPUB")
>  {
> -
> +    UT_DEBUGMSG(("Constructing sniffer\n"));
>  }
>
>  IE_Imp_EPUB_Sniffer::~IE_Imp_EPUB_Sniffer()
> @@ -49,16 +49,19 @@
>
>  const IE_SuffixConfidence * IE_Imp_EPUB_Sniffer::getSuffixConfidence()
>  {
> +    UT_DEBUGMSG(("Recognizing suffixes\n"));
>     return IE_Imp_EPUB_Sniffer_SuffixConfidence;
>  }
>
>  const IE_MimeConfidence * IE_Imp_EPUB_Sniffer::getMimeConfidence()
>  {
> -       return IE_Imp_EPUB_Sniffer_MimeConfidence;
> +    UT_DEBUGMSG(("Recognizing mime type\n"));
> +    return IE_Imp_EPUB_Sniffer_MimeConfidence;
>  }
>
>  UT_Confidence_t IE_Imp_EPUB_Sniffer::recognizeContents(GsfInput * input)
>  {
> +    UT_DEBUGMSG(("Recognizing contents\n"));
>     GsfInfile* zip = gsf_infile_zip_new(input, NULL);
>     UT_Confidence_t confidence = UT_CONFIDENCE_ZILCH;
>     if (zip != NULL)
> @@ -67,19 +70,27 @@
>
>         if (mimetype != NULL)
>         {
> -            gsf_off_t size = gsf_input_size(mimetype);
> +            UT_DEBUGMSG(("Opened 'mimetype' file\n"));
> +            size_t size = gsf_input_size(mimetype);
>
>             if (size > 0)
>             {
> -                gchar* mime = (gchar*)gsf_input_read(mimetype, size, NULL);
> +                UT_DEBUGMSG(("Reading 'mimetype' file contents\n"));
> +                gchar* pMime = (gchar*)gsf_input_read(mimetype, size, NULL);
> +                UT_UTF8String mimeStr;
> +                mimeStr.append(pMime, size);
>
> -                if (!strcmp(mime, EPUB_MIMETYPE))
> +                if (!strcmp(mimeStr.utf8_str(), EPUB_MIMETYPE))
>                 {
> +                    UT_DEBUGMSG(("RUDYJ: Found EPUB\n"));
>                     confidence = UT_CONFIDENCE_PERFECT;
> -                }
> -                g_free(mime);
> +                }
>             }
> -        }
> +
> +            g_object_unref(G_OBJECT(mimetype));
> +        }
> +
> +        g_object_unref(G_OBJECT(zip));
>     }
>
>     return confidence;
> @@ -88,7 +99,9 @@
>  UT_Error IE_Imp_EPUB_Sniffer::constructImporter(PD_Document * pDocument,
>                IE_Imp ** ppie)
>  {
> -    *ppie = new IE_Imp_EPUB(pDocument);
> +    UT_DEBUGMSG(("Constructing importer\n"));
> +    IE_Imp_EPUB* importer = new IE_Imp_EPUB(pDocument);
> +    *ppie = importer;
>
>     return UT_OK;
>  }
>
> Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h
> ===================================================================
> --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h     2011-07-02 13:26:48 UTC (rev 29898)
> +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h     2011-07-02 14:35:43 UTC (rev 29899)
> @@ -21,11 +21,12 @@
>  #ifndef IE_IMP_EPUB_SNIFFER_H_
>  #define IE_IMP_EPUB_SNIFFER_H_
>
> +#include <gsf/gsf-infile-zip.h>
>  #include <gsf/gsf-infile.h>
> -#include <gsf/gsf-infile-zip.h>
> +#include <gsf/gsf-libxml.h>
>
> +#include "ie_imp.h"
>  #include "ie_imp_EPUB.h"
> -#include "ie_imp.h"
>
>
>  class IE_Imp_EPUB_Sniffer : public IE_ImpSniffer
>
> -----------------------------------------------
> To unsubscribe from this list, send a message to
> abisource-cvs-commit-request@abisource.com with the word
> unsubscribe in the message body.
>

-- 
"I like to pay taxes. With them, I buy civilization." --  Oliver Wendell Holmes
Received on Sat Jul 2 18:02:40 2011

This archive was generated by hypermail 2.1.8 : Sat Jul 02 2011 - 18:02:40 CEST