#!/usr/bin/python import glob, re, os, sys, zipfile for pattern in sys.argv[1:]: for filepath in glob.glob(pattern): dirname, basename = os.path.split(filepath) root, ext = os.path.splitext(basename) newname = '%s-clean%s' % (root, ext) outpath = os.path.join(dirname, newname) zin = zipfile.ZipFile (filepath, 'r') zout = zipfile.ZipFile (outpath, 'w') for item in zin.infolist(): if item.filename == 'mimetype': zout.writestr(item, zin.read(item.filename)) for item in zin.infolist(): if item.filename == 'meta.xml': data = zin.read(item.filename) # Find the document creation timestamp. timestamp = '2000-01-01T12:00:00.0' #timestamp = re.search(r'([^<]+)', data).group(1) for item in zin.infolist(): if item.filename == 'content.xml': #data = pexpect.run('ls -l', encoding='utf-8') data = zin.read(item.filename) # Replace all occurrences of date in data = re.sub(b'[^<]+', b'2000-01-01T12:00:00', data) zout.writestr(item, data) elif item.filename != 'mimetype': zout.writestr(item, zin.read(item.filename)) zout.close() zin.close()