Description:
This module can be used to split any file, text or binary
to equal sized chunks. It can also combine the chunks back
to recreate the original file.
Source: Text Source
""" FileSplitter - Simple Python file split/concat module.
What it does
-==========-
1. Split a text/binary file into equal sized chunks
and save them separately.
2. Concat existing chunks and recreate
original file.
Author: Anand Pillai
Copyright : None, (Public Domain)
"""
import os, sys
class FileSplitterException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return str(self.value)
def usage():
return """\nUsage: FileSplitter.py -i <inputfile> -n <chunksize> [option]\n
Options:\n
-s, --split Split file into chunks
-j, --join Join chunks back to file.
"""
class FileSplitter:
""" File splitter class """
def __init__(self):
self.__filename = ''
self.__numchunks = 5
self.__chunksize = 0
self.__postfix = ''
self.__progname = "FileSplitter.py"
self.__action = 0
def parseOptions(self, args):
import getopt
try:
optlist, arglist = getopt.getopt(args, 'sji:n:', ["split=", "join="])
except getopt.GetoptError, e:
print e
return None
for option, value in optlist:
if option.lower() in ('-i', ):
self.__filename = value
elif option.lower() in ('-n', ):
self.__numchunks = int(value)
elif option.lower() in ('-s', '--split'):
self.__action = 0
elif option.lower() in ('-j', '--join'):
self.__action = 1
if not self.__filename:
sys.exit("Error: filename not given")
def do_work(self):
if self.__action==0:
self.split()
elif self.__action==1:
self.combine()
else:
return None
def split(self):
""" Split the file and save chunks
to separate files """
print 'Splitting file', self.__filename
print 'Number of chunks', self.__numchunks, '\n'
try:
f = open(self.__filename, 'rb')
except (OSError, IOError), e:
raise FileSplitterException, str(e)
bname = (os.path.split(self.__filename))[1]
fsize = os.path.getsize(self.__filename)
self.__chunksize = int(float(fsize)/float(self.__numchunks))
chunksz = self.__chunksize
total_bytes = 0
for x in range(self.__numchunks):
chunkfilename = bname + '-' + str(x+1) + self.__postfix
if x == self.__numchunks - 1:
chunksz = fsize - total_bytes
try:
print 'Writing file',chunkfilename
data = f.read(chunksz)
total_bytes += len(data)
chunkf = file(chunkfilename, 'wb')
chunkf.write(data)
chunkf.close()
except (OSError, IOError), e:
print e
continue
except EOFError, e:
print e
break
print 'Done.'
def sort_index(self, f1, f2):
index1 = f1.rfind('-')
index2 = f2.rfind('-')
if index1 != -1 and index2 != -1:
i1 = int(f1[index1:len(f1)])
i2 = int(f2[index2:len(f2)])
return i2 - i1
def combine(self):
""" Combine existing chunks to recreate the file.
The chunks must be present in the cwd. The new file
will be written to cwd. """
import re
print 'Creating file', self.__filename
bname = (os.path.split(self.__filename))[1]
bname2 = bname
for a, b in zip(['+', '.', '[', ']','$', '(', ')'],
['\+','\.','\[','\]','\$', '\(', '\)']):
bname2 = bname2.replace(a, b)
chunkre = re.compile(bname2 + '-' + '[0-9]+')
chunkfiles = []
for f in os.listdir("."):
print f
if chunkre.match(f):
chunkfiles.append(f)
print 'Number of chunks', len(chunkfiles), '\n'
chunkfiles.sort(self.sort_index)
data=''
for f in chunkfiles:
try:
print 'Appending chunk', os.path.join(".", f)
data += open(f, 'rb').read()
except (OSError, IOError, EOFError), e:
print e
continue
try:
f = open(bname, 'wb')
f.write(data)
f.close()
except (OSError, IOError, EOFError), e:
raise FileSplitterException, str(e)
print 'Wrote file', bname
def main():
import sys
if len(sys.argv)<2:
sys.exit(usage())
fsp = FileSplitter()
fsp.parseOptions(sys.argv[1:])
fsp.do_work()
if __name__=="__main__":
main()
Discussion:
Often we need to split big files into many chunks
either for saving them to disks, uploading to a web-site
or for some other reason. I used to rely on 3rd party
programs before for this task, but never could find a program
handy when needed.
As usual python excels in such 'scripting' tasks and this
script makes the job a breeze. :-)
|
Add comment
|
Number of comments: 5
Refactored..., Anand Pillai, 2003/10/17
# Modified on 18th Oct
I have modified this recipe to print a Usage string,
and it now takes command line options. While combining
it does not need the number of chunks and works with
all the chunks in the current directory.
-Anand
Add comment
Files larger than physical memory on machine, Anthony DiGregorio, 2004/03/24
I made a slight modification to the combine function. See how I appended "data" to the file from each chuck, instead of holding a large "data" object in memory.
Add comment
File size larger than Physical Memory available, Anthony DiGregorio, 2004/03/24
Here is the code:
try:
cmbf = open(bname, 'ab')
for f in chunkfiles:
try:
print 'Appending chunk', os.path.join(".", f)
#data += open(f, 'rb').read()
data = open(f, 'rb').read()
cmbf.write(data)
except (OSError, IOError, EOFError), e:
print e
continue
cmbf.close()
except (OSError, IOError, EOFError), e:
raise FileSplitterException, str(e)
#try:
# f = open(bname, 'wb')
# f.write(data)
# f.close()
#except (OSError, IOError, EOFError), e:
# raise FileSplitterException, str(e)
print 'Wrote file', bname
Add comment
constant filename length, jc Not specified, 2005/02/05
Here is the code to get 3 digits for numeration of each file (for split mode, near of line #107 for me):
chunkfilename = bname + ('-%03d' % (x+1)) + self.__postfix
Add comment
Output to the same directory, jc Not specified, 2005/02/05
Here is the code to generate each files in the same directory the the source (near line #94 for me):
#bname = (os.path.split(self.__filename))[1]
## output to the same directory
bname = self.__filename
Add comment
|
|
|