ExtractZip2.py

# coding=utf-8
""" 
ExctractZip.py 
Author: Fredi Hartmann aka ADP 
Startdate: 03/08/2020 
Update:    03/09/2020 
Update:    07/22/2020 
 
Public Domain 
This script comes without any license restrictions. 
 
German Note: 
    Trotzdem das Folgende frei ist, sollte es zum guten Ton gehören, 
    den Autor in der eigenen Arbeit zu erwähnen, wenn man auf dessen 
    Arbeit zurück gegriffen hat. 
 
English note: 
    Even though the following is free, it should be good style 
    to mention the author in one's own work if one has drawn on his work. 
 
     (Online translated via https://www.deepl.com/) 
"""

from __future__ import print_function

import os
import re
import sys
from shutil import copyfileobj
from zipfile import ZipFile, ZipInfo, is_zipfile
from gzip import open as gzopen
import time
import wx

# Wether or not full pathes are written to Poser files.
# True writes something like "c:/abc/def/package/content/file.xxx"
# False writes a relative path: "content/file.xxx"
USE_FULLPATH = False
# If Full pathes are written, the installation path is hold global.
INSTALLPATH = ""
# Indentation means: level of curly brackets. Can be used for
# conditional replacements.
INDENTATION = 0
USE_INDENTATION = True
# First word of the current line to analyze/write. Can be used for
# conditional replacements.
CURRENTKEYWORD = ""
USE_KEYWORD = True
# Assume filenames ending with "z" as gziped files (*.crz, *.obz, *.fcz, etc)
ASSSUME_GZIP = True
# Poserpath holds the current position inside a poser object.
# In a conditional replace POSERPATH == "<actorname>/channels/groups" for example says
# that what ever you are will change is the name of a groupNode (part of groups).
# If POSERPATH == "<actorname>/channels/groups/groupNode", you are going to change
# a parmNodes name.
POSERPATH = None
USE_POSERPATH = True

# Unpack zips inside a zipfile or just copy.
USE_ZIPINZIP = True

# Wether or not all found pathes are replced
# or only those who reference to files inside the package.
USE_LIMITED_REPLACEMENTS = True

nr_bytes = 0
nr_files = 0
try:
    import poser

    POSERPYTHON = True
except ImportError:
    from PoserLibs import POSER_FAKE as poser

    POSERPYTHON = False

if USE_POSERPATH:
    # POSERPATH needs indentation
    USE_INDENTATION = True

# ----------------------------------------------------------------------------------------
# Keywords of lines in Poser files containing filenames to replace.
poser_keywords = "morphBinaryFile", \
                 "figureResFile", \
                 "objFileGeom", \
                 "textureMap", \
                 "bumpMap", \
                 "file", \
                 "runPythonScript",

# ----------------------------------------------------------------------------------------

# Driveletters to replace unconditionally
drive_substitutes = {"d": "c", "e": "c"}

# ----------------------------------------------------------------------------------------

# Pathnames to replace (key = old, value = new).
# Keys should be all lowercase. Upper/lower case for Values are preserved.
# Replacement occures in lines starting with certain keywords (see above).
path_substitutes = {
    "runtime/textures": "Textures",
    "runtime/libraries": "Libraries",
    "runtime/geometries": "Geometries",
    "runtime/scenes": "Scenes",
}

# ----------------------------------------------------------------------------------------
# keywords in path/filenames to prevent extracting/copying.
# upper/lower case ignored.
excluded_path_keywords = (
    r"/VendorLinks",
    r"\.txt",
    r"\.pdf"
)

# ----------------------------------------------------------------------------------------

# Regular expressions to find things in analysed files
drive_regex = re.compile(r"^(" + "|".join(drive_substitutes.keys()) + r"):[/\\]", re.IGNORECASE)
poser_keyword_regex = re.compile(r"^\s*(:?" + "|".join(poser_keywords) + r")\s+(.*)")
first_word_regex = re.compile(r"^\s*?(\w+)")
# Regular expression to ignore certain pathes/files
excludepath_regex = re.compile("|".join(excluded_path_keywords), re.IGNORECASE)


# ----------------------------------------------------------------------------------------

def uncompressed_extension(tail):
    t = tail.lower()
    if t == "obz":
        return "obj"
    elif t == "crz":
        return "cr2"
    elif t == "fcz":
        return "fc2"
    elif t == "hrz":
        return "hr2"
    elif t == "mcz":
        return "mc6"
    elif t == "p2z":
        return "pz2"
    elif t == "ppz":
        return "pp2"
    else:
        return tail[:-1] + "2"


# ----------------------------------------------------------------------------------------


class PoserPath(list):
    def get_path(self):
        return "/".join(map(str, self))

    def push(self, item):
        if item:
            super(self.__class__, self).append(str(item))

    def pop_n(self, n=1):
        while self.__len__() and n:
            self.pop()
            n -= 1

    def extend_path(self, part):
        if part:
            self.extend(part.split("/"))

    def has(self, keyword):
        # to be able to use "in" as a function
        return keyword in self

    def __str__(self):
        return "|".join(self)


# ----------------------------------------------------------------------------------------
#    Sadly Poser-Python (version 2.710, patchlevel from 2017) does not support
#    urllib3, nor any other lib for easy https access.
#    So I will left this untouched atm. Maybe anyone else can look into that-

def download_file(url, report_callback=None):
    if report_callback is None:
        report_callback = lambda a, b, c: True

    import urllib
    try:
        filename, headers = urllib.urlretrieve(url, reporthook=report_callback)
    except Exception as err:
        filename = err
    finally:
        del urllib

    return filename


def is_url(filename):
    for scheme in ("http://", "ftp://"):
        if filename.startswith(scheme):
            return True
    return False


# ----------------------------------------------------------------------------------------

def collect_filenames(zipfilename):
    """ 
    Collects all filenames inside a zip-file and 
    returns a list. 
    if global variable USE_LIMITED_REPLACEMENTS is False, 
    the list will be empty. 
    """
    tmp_list = list()
    if USE_LIMITED_REPLACEMENTS:
        with ZipFile(zipfilename, "r") as zfile:
            for info in zfile.infolist():
                if info.file_size > 0:
                    _path = info.filename
                    _path = _path.replace("\\", ":").replace("/", ":")
                    while _path[0] == "/":
                        _path = _path[1:]
                    tmp_list.append(_path)
    return tmp_list


def regex_from_list(arg):
    if USE_LIMITED_REPLACEMENTS:
        if isinstance(arg, basestring):
            arg = collect_filenames(arg)
        if arg is None or len(arg) == 0:
            return None
        assert hasattr(arg, "__iter__")
        return re.compile(r"(" + "|".join(a for a in arg) + r")$")
    else:
        return None


# ----------------------------------------------------------------------------------------

def convert(stream_in, stream_out, replaceable_files=None):
    global INDENTATION  # number of levels for {} (curly brackets)
    global CURRENTKEYWORD  # word current line starts with
    global POSERPATH

    replaceable_regex = regex_from_list(replaceable_files)
    if USE_POSERPATH:
        POSERPATH = PoserPath()

    def path_correction(_path):
        if drive_regex.match(_path):
            _path[0] = drive_substitutes[_path[0]]
        _path = _path.replace(":", "/").replace("\\", "/")
        while _path[0] == "/":
            _path = _path[1:]

        _path_lower = _path.lower()  # to avoid lots of calls to lower()
        for k, v in path_substitutes.items():
            if _path_lower.startswith(k):
                _path = v + _path[len(k):]
                break
        if USE_FULLPATH:
            _path = os.path.abspath(_path)
        return _path

    #    print("*" * 80)
    #    print(stream_out.name)
    #    print("*" * 80)
    last_indent = 0

    for line_nr, line in enumerate(stream_in):
        last_indent = INDENTATION
        if USE_INDENTATION:
            INDENTATION += line.count("{") - line.count("}")

        if USE_POSERPATH:
            if INDENTATION < last_indent:
                POSERPATH.pop_n(1)
            elif INDENTATION > last_indent:
                POSERPATH.push(CURRENTKEYWORD)

        if len(POSERPATH) > 2:
            pass

        if USE_KEYWORD:
            r = first_word_regex.match(line)
            CURRENTKEYWORD = r.group(1) if r else ""

        r = poser_keyword_regex.search(line)
        if r:
            # Keyword found, path is stored in group(2).
            found_str = r.group(2).strip().replace('"', "")
            if replaceable_regex:
                if replaceable_regex.search(found_str):
                    line = line.replace(found_str, path_correction(found_str))
                    # print("1 Replaced ({}): {}".format(found_str, line))
                else:
                    if "NO_MAP" not in line:
                        print("Left alone:", line)
            else:
                line = line.replace(found_str, path_correction(found_str))
                # print("2 Replaced ({}): {}".format(found_str, line))

        stream_out.write(line)


def convert_file(poserfile, newpath, forced_pathnames=None):
    """ 
    Convert a single poserfile or a zip-file. 
    Converted files are created in newpath with their original names 
    (poserfile in case of a single file, else the subdirectories/filenames 
    contained in the zip-archiv). 
    Content of certain files is analysed and filenames are replaced to 
    point to the new path. 
    """

    global INDENTATION  # number of levels for {} (curly brackets)
    global nr_files, nr_bytes, INSTALLPATH

    if USE_FULLPATH:
        INSTALLPATH = newpath
    last_indent = INDENTATION
    INDENTATION = 0

    def do_convert(in_name, out_name, just_copy=False, replaceable_files=None):
        if isinstance(in_name, basestring):
            try:
                fh_in = open(in_name, "r")
            except IOError as err:
                return err
        else:
            fh_in = in_name  # given as open file

        if isinstance(out_name, basestring):
            if ASSSUME_GZIP and out_name.endswith("z"):
                tmp_name = out_name + ".tmp"
                copyfileobj(fh_in, open(tmp_name, "w"))
                name, ext = out_name.rsplit(".", 1)
                out_name = name + "." + uncompressed_extension(ext)
                res = do_convert(gzopen(tmp_name, "r"), out_name)
                os.remove(tmp_name)
                return res
            else:
                try:
                    fh_out = open(out_name, "w")
                except IOError as err:
                    return err
        else:
            fh_out = out_name  # given as open file

        if just_copy:
            copyfileobj(fh_in, fh_out)
        else:
            global CURRENTKEYWORD, POSERPATH
            old = CURRENTKEYWORD, POSERPATH
            convert(fh_in, fh_out, replaceable_files)
            CURRENTKEYWORD, POSERPATH = old

        try:
            fh_out.close()
            fh_in.close()
        except IOError:
            pass

        INDENTATION = last_indent
        return None

    # end of do_convert()

    if not os.path.isdir(newpath):
        try:
            os.makedirs(newpath)
        except IOError:
            print("Can't create path '{}'".format(newpath))
            return

    if isinstance(poserfile, basestring) and is_url(poserfile):
        local_file = download_file(poserfile)
        if os.path.isfile(local_file):
            return convert_file(local_file, newpath)
        else:
            return local_file  # should contain error message

    if not os.path.exists(poserfile):
        print("Can't find poserfile '{}'".format(poserfile))
        return

    os.chdir(newpath)

    if is_zipfile(poserfile):
        replaceable_files = collect_filenames(poserfile) + (forced_pathnames or [])

        nr_files = nr_bytes = 0
        with ZipFile(poserfile, "r") as zfile:
            for info in zfile.infolist():  # type: ZipInfo
                new_filename = re.sub(r"runtime[:/\\]", "", info.filename, flags=re.IGNORECASE)
                if excludepath_regex.search(new_filename):
                    # skip unwanted content
                    continue

                if USE_ZIPINZIP and info.filename.lower().endswith(".zip"):
                    res = zfile.extract(info, newpath)
                    convert_file(res, os.path.dirname(res), replaceable_files)
                    os.remove(res)
                elif info.file_size > 0:
                    nr_files += 1
                    nr_bytes += info.file_size
                    new_filename = os.path.join(newpath, new_filename)
                    p = os.path.dirname(new_filename)
                    if not os.path.isdir(p):
                        try:
                            os.makedirs(p)
                        except IOError as err:
                            return err
                    ext = new_filename.rsplit(".")[-1]
                    just_copy = ext.lower() in "bmp gif obj jpg jpeg pdf pmd psd png " \
                                               "tif txt xmp"

                    res = do_convert(zfile.open(info.filename, "r"), new_filename, just_copy, replaceable_files)
                    if isinstance(res, IOError):
                        print(res)
                        break
    else:
        fname = os.path.basename(poserfile)
        do_convert(poserfile, os.path.join(newpath, fname))

    os.chdir(os.path.dirname(sys.argv[0]))


if POSERPYTHON:
    TEMP_CONFIG = globals().setdefault(os.path.basename(sys.argv[0]), dict())
else:
    app = wx.App()
    TEMP_CONFIG = dict(last_zip_dir="/home/fredi/Downloads",
                       last_export_dir="/home/fredi/Converter_test/")

MYPATH = os.path.dirname(sys.argv[0])
if __name__ == "__main__":
    t = None

    with wx.FileDialog(None, "Open ZIP File",
                       wildcard="ZIP files (*.zip)|*.zip",
                       defaultDir=TEMP_CONFIG.get("last_zip_dir", MYPATH),
                       style=wx.FD_OPEN | wx.FD_FILE_MUST_EXIST) as dlg:
        if dlg.ShowModal() != wx.ID_CANCEL:
            zip_filename = dlg.GetPath()
            TEMP_CONFIG["last_zip_dir"] = os.path.dirname(zip_filename)

            with wx.DirDialog(None, "Choose directory",
                              defaultPath=TEMP_CONFIG.get("last_export_dir", os.path.dirname(zip_filename)),
                              style=wx.DD_DEFAULT_STYLE) as dlg:
                if dlg.ShowModal() != wx.ID_CANCEL:
                    extract_path = dlg.GetPath()
                    TEMP_CONFIG["last_export_dir"] = extract_path

                    t = time.time()
                    convert_file(zip_filename, extract_path)
if t:
    print("Done in", round(time.time() - t, 2), "Seconds.")
    print(round(float(nr_bytes) / 1000 / 1000, 2), "Megabytes written to", nr_files, "files.")
else:
    print("Aborted.")