excess.org / back to mkzip mkzip.py download
#!/usr/bin/python

# mkzip.py
# Copyright (c) 2002, 2003  Ian Ward
#
# This module is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.


# Release History
#
# 1.2   2003-09-15
#       New Features contributed by David McNab <david@rebirthing.co.nz>:
#       zip strings in lieu of files on the filesystem
#       use python 1.5.2 compatible operators
#
# 1.1   2003-01-02
#       Bugfix: used source name instead of dest name in zipfile length
#       calculation
#
# 1.0   2002-12-23
#       First release



"""
mkzip.py

Create a zip file from files stored in the filesystem.  Resulting
zip file length may be calculated from the source file sizes.

Compression is not supported -- files are stored.

mkzip.py homepage: http://excess.org/mkzip/
"""


_usage = """
Usage: mkzip.py (file1) (file2) ... > (output.zip)
"""

import sys
import os # for stat
import zlib # for crc32
import time # for localtime
import StringIO # for when input files are passed in as strings

BUF_SIZE = 4096

def main():
        a = sys.argv
        if len(a) < 2:
                sys.stderr.write( _usage )
                return

        s = size_of_zip(a[1:])
        sys.stderr.write("Expected size: %d bytes\n"%s)

        o = sys.stdout
        create_zip( o, a[1:] )


def create_zip( o, files ):
        """
        create_zip( o, files )

        files is a list of filenames or (srcfile, destfile) tuples,
        or (None, destfile, srcstring) tuples.
        destfile is the name for the file in the zip archive.
        
        writes zip file containing files using o.write
        """
        rec = []
        offset = 0
        for fname in files:
                if type(fname) == type(()) and fname[0] is None:
                        srcfile, destfile, srcstring = fname
                        f = StringIO.StringIO(srcstring)
                else:
                        if type(fname) == type(()):
                                srcfile, destfile = fname
                        else:
                                srcfile = destfile = fname
                        f = open( srcfile, 'rb' )
                cdatetime,ccrc32,cflength,total_length=store_file(o,destfile,f)
                f.close()
                rec.append( (destfile, cdatetime, ccrc32, cflength, offset) )
                offset = offset + total_length

        cdiroffset = offset
        cdirlen = 0
        for fname, cdatetime, ccrc32, cflength, f_offset in rec:
                l = central_dir_rec(o,fname,cdatetime,ccrc32,cflength,f_offset)
                cdirlen = cdirlen + l

        central_dir_end(o,len(rec),cdirlen,cdiroffset)




def size_of_zip( files ):
        """
        size_zip( files )

        files is a list of filenames or (srcfile, destfile) tuples,
        or (None, destfile, srcstring) tuples.
        destfile is the name for the file in the zip archive.
        
        returns the byte size of a zip file that contains files.
        """
        total = 0
        for f in files:
                if type(f) == type(()) and f[0] is None:
                        srcfile, destfile, srcstring = f
                        fsize = len(srcstring)
                else:
                        if type(f) == type(()):
                                srcfile, destfile = f
                        else:
                                srcfile = destfile = f
                        fsize = file_size( srcfile )

                total = total + store_file_length( destfile, fsize )
                total = total + central_dir_rec_length( destfile )

        total = total + central_dir_end_length()

        return total





##
## worker functions
##


def store_file( o, fname, f ):
        """
        store_file( o, fname, f ) -> cdatetime, crc32, flength
        o = output file handle
        f = input file handle
        fname = name of file in zip

        zip file header, file name and file contents are written to o.
        """

        cdatetime = int_to_le(dos_date_time(), 4)
        cfnamelen = int_to_le(len(fname), 2)

        o.write( "PK\x03\x04\n\0\x08\0\0\0" )
        o.write( cdatetime )
        o.write( "\0\0\0\0" ) # crc (real value stored later)
        o.write( "\0\0\0\0" ) # "compressed" size (real value stored later)
        o.write( "\0\0\0\0" ) # "uncompressed" size (real value stored later)
        o.write( cfnamelen )
        o.write( "\0\0" ) # no extra field
        o.write( fname )

        buf = f.read( BUF_SIZE )
        l = 0
        c = zlib.crc32("")
        while( buf != '' ):
                o.write( buf )
                l = l + len( buf )
                c = zlib.crc32( buf, c )
                buf = f.read( BUF_SIZE )

        ccrc32 = int_to_le(c,4)
        cflength = int_to_le(l,4)

        o.write( ccrc32 ) # crc
        o.write( cflength ) # "compressed" size
        o.write( cflength ) # "uncompressed" size

        total_length = store_file_length(fname, l)

        return cdatetime, ccrc32, cflength, total_length

def store_file_length(fname, flength):
        return 10+4+4+4+4+2+2+len(fname)+flength+4+4+4


def central_dir_rec(o, fname, cdatetime, crc32, flength, offset):
        """
        central_dir_rec( o, fname, cdatetime, crc32, flength, offset ) -> length
        o = output file handle
        fname = name of file in zip
        cdatetime = dos date/time as 4 byte string
        crc32 = crc as 4 byte string
        flength = file length as 4 byte string
        offset = file offset in zip
        
        central dir rec is written to o.  length of record is returned.
        """

        assert type(crc32)==type("") and len(crc32)==4
        assert type(flength)==type("") and len(flength)==4
        assert type(cdatetime)==type("") and len(cdatetime)==4

        cfnamelen = int_to_le(len(fname), 2)
        coffset = int_to_le(offset, 4)

        o.write( "PK\x01\x02\0\0\n\0\x08\0\0\0" )
        o.write( cdatetime )
        o.write( crc32 )
        o.write( flength )
        o.write( flength )
        o.write( cfnamelen ) # 2
        o.write( "\0\0" ) # no extra field
        o.write( "\0\0" ) # no file comment
        o.write( "\0\0" ) # disk number start
        o.write( "\0\0" ) # internal attribute
        o.write( "\0\0\0\0" ) # external file attribute
        o.write( coffset ) # offset to zip data
        o.write( fname )

        return central_dir_rec_length(fname)

def central_dir_rec_length(fname):
        return 12+4+4+4+4+2+2+2+2+2+4+4+len(fname)


def central_dir_end(o, count, dirlength, offset ):
        """
        central_dir_end( o, count, dirlength, offset )
        o = output file handle
        count = number of files stored
        dirlength = total length of central dir records
        offset = offset of first central dir record

        central dir ending is written to o.
        """

        ccount = int_to_le(count, 2)
        cdirlen = int_to_le(dirlength, 4)
        coffset = int_to_le(offset, 4)

        o.write( "PK\x05\x06\0\0\0\0" )
        o.write( ccount )
        o.write( ccount )
        o.write( cdirlen )
        o.write( coffset )
        o.write( "\0\0" ) # comment length

def central_dir_end_length():
        return 8+2+2+4+4+2



##
## utility functions
##

def file_size( f ):
        """
        file_size( f ) -> byte size of file f
        """
        a = os.stat(f)
        return a[6]

def int_to_le( val, chars ):
        """
        int_to_le( val, chars ) -> string
        
        Convert int to a little endian string value with chars characters
        """

        o = ""
        i = val
        assert int(i) == i, "int_to_le: Invalid parameter %s" % `i`
        for a in range(chars):
                o = o + chr( i & 0xff )
                i = i >> 8
        assert i == 0 or i == -1, "insufficient room in %d bytes for %d" % (chars, i)
        return o

def dos_date_time():
        """
        dos_date_time() -> dos style date/time as integer
        """
        y,m,d,hr,min,sec,ignore,ignore,ignore = time.localtime(time.time())

        idate = ((y-1980) << 9) | (m << 5) | (d)
        itime = (hr << 11) | (min << 5) | (sec/2)

        return (idate << 16) | itime




if __name__ == "__main__": main()