Source code for msoffcrypto.format.xls97

import logging, io, shutil, tempfile
from struct import pack, unpack
from collections import namedtuple

import olefile

from msoffcrypto import exceptions
from msoffcrypto.format import base
from msoffcrypto.format.common import _parse_encryptionheader, _parse_encryptionverifier
from msoffcrypto.method.rc4 import DocumentRC4
from msoffcrypto.method.rc4_cryptoapi import DocumentRC4CryptoAPI
from msoffcrypto.method.xor_obfuscation import DocumentXOR

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


recordNameNum = {
    "Formula": 6,
    "EOF": 10,
    "CalcCount": 12,
    "CalcMode": 13,
    "CalcPrecision": 14,
    "CalcRefMode": 15,
    "CalcDelta": 16,
    "CalcIter": 17,
    "Protect": 18,
    "Password": 19,
    "Header": 20,
    "Footer": 21,
    "ExternSheet": 23,
    "Lbl": 24,
    "WinProtect": 25,
    "VerticalPageBreaks": 26,
    "HorizontalPageBreaks": 27,
    "Note": 28,
    "Selection": 29,
    "Date1904": 34,
    "ExternName": 35,
    "LeftMargin": 38,
    "RightMargin": 39,
    "TopMargin": 40,
    "BottomMargin": 41,
    "PrintRowCol": 42,
    "PrintGrid": 43,
    "FilePass": 47,
    "Font": 49,
    "PrintSize": 51,
    "Continue": 60,
    "Window1": 61,
    "Backup": 64,
    "Pane": 65,
    "CodePage": 66,
    "Pls": 77,
    "DCon": 80,
    "DConRef": 81,
    "DConName": 82,
    "DefColWidth": 85,
    "XCT": 89,
    "CRN": 90,
    "FileSharing": 91,
    "WriteAccess": 92,
    "Obj": 93,
    "Uncalced": 94,
    "CalcSaveRecalc": 95,
    "Template": 96,
    "Intl": 97,
    "ObjProtect": 99,
    "ColInfo": 125,
    "Guts": 128,
    "WsBool": 129,
    "GridSet": 130,
    "HCenter": 131,
    "VCenter": 132,
    "BoundSheet8": 133,
    "WriteProtect": 134,
    "Country": 140,
    "HideObj": 141,
    "Sort": 144,
    "Palette": 146,
    "Sync": 151,
    "LPr": 152,
    "DxGCol": 153,
    "FnGroupName": 154,
    "FilterMode": 155,
    "BuiltInFnGroupCount": 156,
    "AutoFilterInfo": 157,
    "AutoFilter": 158,
    "Scl": 160,
    "Setup": 161,
    "ScenMan": 174,
    "SCENARIO": 175,
    "SxView": 176,
    "Sxvd": 177,
    "SXVI": 178,
    "SxIvd": 180,
    "SXLI": 181,
    "SXPI": 182,
    "DocRoute": 184,
    "RecipName": 185,
    "MulRk": 189,
    "MulBlank": 190,
    "Mms": 193,
    "SXDI": 197,
    "SXDB": 198,
    "SXFDB": 199,
    "SXDBB": 200,
    "SXNum": 201,
    "SxBool": 202,
    "SxErr": 203,
    "SXInt": 204,
    "SXString": 205,
    "SXDtr": 206,
    "SxNil": 207,
    "SXTbl": 208,
    "SXTBRGIITM": 209,
    "SxTbpg": 210,
    "ObProj": 211,
    "SXStreamID": 213,
    "DBCell": 215,
    "SXRng": 216,
    "SxIsxoper": 217,
    "BookBool": 218,
    "DbOrParamQry": 220,
    "ScenarioProtect": 221,
    "OleObjectSize": 222,
    "XF": 224,
    "InterfaceHdr": 225,
    "InterfaceEnd": 226,
    "SXVS": 227,
    "MergeCells": 229,
    "BkHim": 233,
    "MsoDrawingGroup": 235,
    "MsoDrawing": 236,
    "MsoDrawingSelection": 237,
    "PhoneticInfo": 239,
    "SxRule": 240,
    "SXEx": 241,
    "SxFilt": 242,
    "SxDXF": 244,
    "SxItm": 245,
    "SxName": 246,
    "SxSelect": 247,
    "SXPair": 248,
    "SxFmla": 249,
    "SxFormat": 251,
    "SST": 252,
    "LabelSst": 253,
    "ExtSST": 255,
    "SXVDEx": 256,
    "SXFormula": 259,
    "SXDBEx": 290,
    "RRDInsDel": 311,
    "RRDHead": 312,
    "RRDChgCell": 315,
    "RRTabId": 317,
    "RRDRenSheet": 318,
    "RRSort": 319,
    "RRDMove": 320,
    "RRFormat": 330,
    "RRAutoFmt": 331,
    "RRInsertSh": 333,
    "RRDMoveBegin": 334,
    "RRDMoveEnd": 335,
    "RRDInsDelBegin": 336,
    "RRDInsDelEnd": 337,
    "RRDConflict": 338,
    "RRDDefName": 339,
    "RRDRstEtxp": 340,
    "LRng": 351,
    "UsesELFs": 352,
    "DSF": 353,
    "CUsr": 401,
    "CbUsr": 402,
    "UsrInfo": 403,
    "UsrExcl": 404,
    "FileLock": 405,
    "RRDInfo": 406,
    "BCUsrs": 407,
    "UsrChk": 408,
    "UserBView": 425,
    "UserSViewBegin": 426,
    "UserSViewBegin_Chart": 426,
    "UserSViewEnd": 427,
    "RRDUserView": 428,
    "Qsi": 429,
    "SupBook": 430,
    "Prot4Rev": 431,
    "CondFmt": 432,
    "CF": 433,
    "DVal": 434,
    "DConBin": 437,
    "TxO": 438,
    "RefreshAll": 439,
    "HLink": 440,
    "Lel": 441,
    "CodeName": 442,
    "SXFDBType": 443,
    "Prot4RevPass": 444,
    "ObNoMacros": 445,
    "Dv": 446,
    "Excel9File": 448,
    "RecalcId": 449,
    "EntExU2": 450,
    "Dimensions": 512,
    "Blank": 513,
    "Number": 515,
    "Label": 516,
    "BoolErr": 517,
    "String": 519,
    "Row": 520,
    "Index": 523,
    "Array": 545,
    "DefaultRowHeight": 549,
    "Table": 566,
    "Window2": 574,
    "RK": 638,
    "Style": 659,
    "BigName": 1048,
    "Format": 1054,
    "ContinueBigName": 1084,
    "ShrFmla": 1212,
    "HLinkTooltip": 2048,
    "WebPub": 2049,
    "QsiSXTag": 2050,
    "DBQueryExt": 2051,
    "ExtString": 2052,
    "TxtQry": 2053,
    "Qsir": 2054,
    "Qsif": 2055,
    "RRDTQSIF": 2056,
    "BOF": 2057,
    "OleDbConn": 2058,
    "WOpt": 2059,
    "SXViewEx": 2060,
    "SXTH": 2061,
    "SXPIEx": 2062,
    "SXVDTEx": 2063,
    "SXViewEx9": 2064,
    "ContinueFrt": 2066,
    "RealTimeData": 2067,
    "ChartFrtInfo": 2128,
    "FrtWrapper": 2129,
    "StartBlock": 2130,
    "EndBlock": 2131,
    "StartObject": 2132,
    "EndObject": 2133,
    "CatLab": 2134,
    "YMult": 2135,
    "SXViewLink": 2136,
    "PivotChartBits": 2137,
    "FrtFontList": 2138,
    "SheetExt": 2146,
    "BookExt": 2147,
    "SXAddl": 2148,
    "CrErr": 2149,
    "HFPicture": 2150,
    "FeatHdr": 2151,
    "Feat": 2152,
    "DataLabExt": 2154,
    "DataLabExtContents": 2155,
    "CellWatch": 2156,
    "FeatHdr11": 2161,
    "Feature11": 2162,
    "DropDownObjIds": 2164,
    "ContinueFrt11": 2165,
    "DConn": 2166,
    "List12": 2167,
    "Feature12": 2168,
    "CondFmt12": 2169,
    "CF12": 2170,
    "CFEx": 2171,
    "XFCRC": 2172,
    "XFExt": 2173,
    "AutoFilter12": 2174,
    "ContinueFrt12": 2175,
    "MDTInfo": 2180,
    "MDXStr": 2181,
    "MDXTuple": 2182,
    "MDXSet": 2183,
    "MDXProp": 2184,
    "MDXKPI": 2185,
    "MDB": 2186,
    "PLV": 2187,
    "Compat12": 2188,
    "DXF": 2189,
    "TableStyles": 2190,
    "TableStyle": 2191,
    "TableStyleElement": 2192,
    "StyleExt": 2194,
    "NamePublish": 2195,
    "NameCmt": 2196,
    "SortData": 2197,
    "Theme": 2198,
    "GUIDTypeLib": 2199,
    "FnGrp12": 2200,
    "NameFnGrp12": 2201,
    "MTRSettings": 2202,
    "CompressPictures": 2203,
    "HeaderFooter": 2204,
    "CrtLayout12": 2205,
    "CrtMlFrt": 2206,
    "CrtMlFrtContinue": 2207,
    "ForceFullCalculation": 2211,
    "ShapePropsStream": 2212,
    "TextPropsStream": 2213,
    "RichTextStream": 2214,
    "CrtLayout12A": 2215,
    "Units": 4097,
    "Chart": 4098,
    "Series": 4099,
    "DataFormat": 4102,
    "LineFormat": 4103,
    "MarkerFormat": 4105,
    "AreaFormat": 4106,
    "PieFormat": 4107,
    "AttachedLabel": 4108,
    "SeriesText": 4109,
    "ChartFormat": 4116,
    "Legend": 4117,
    "SeriesList": 4118,
    "Bar": 4119,
    "Line": 4120,
    "Pie": 4121,
    "Area": 4122,
    "Scatter": 4123,
    "CrtLine": 4124,
    "Axis": 4125,
    "Tick": 4126,
    "ValueRange": 4127,
    "CatSerRange": 4128,
    "AxisLine": 4129,
    "CrtLink": 4130,
    "DefaultText": 4132,
    "Text": 4133,
    "FontX": 4134,
    "ObjectLink": 4135,
    "Frame": 4146,
    "Begin": 4147,
    "End": 4148,
    "PlotArea": 4149,
    "Chart3d": 4154,
    "PicF": 4156,
    "DropBar": 4157,
    "Radar": 4158,
    "Surf": 4159,
    "RadarArea": 4160,
    "AxisParent": 4161,
    "LegendException": 4163,
    "ShtProps": 4164,
    "SerToCrt": 4165,
    "AxesUsed": 4166,
    "SBaseRef": 4168,
    "SerParent": 4170,
    "SerAuxTrend": 4171,
    "IFmtRecord": 4174,
    "Pos": 4175,
    "AlRuns": 4176,
    "BRAI": 4177,
    "SerAuxErrBar": 4187,
    "ClrtClient": 4188,
    "SerFmt": 4189,
    "Chart3DBarShape": 4191,
    "Fbi": 4192,
    "BopPop": 4193,
    "AxcExt": 4194,
    "Dat": 4195,
    "PlotGrowth": 4196,
    "SIIndex": 4197,
    "GelFrame": 4198,
    "BopPopCustom": 4199,
    "Fbi2": 4200,
}


def _parse_header_RC4(encryptionInfo):
    # RC4: https://msdn.microsoft.com/en-us/library/dd908560(v=office.12).aspx
    salt = encryptionInfo.read(16)
    encryptedVerifier = encryptionInfo.read(16)
    encryptedVerifierHash = encryptionInfo.read(16)
    info = {
        "salt": salt,
        "encryptedVerifier": encryptedVerifier,
        "encryptedVerifierHash": encryptedVerifierHash,
    }
    return info


def _parse_header_RC4CryptoAPI(encryptionInfo):
    flags = encryptionInfo.read(4)
    (headerSize,) = unpack("<I", encryptionInfo.read(4))
    logger.debug(headerSize)
    blob = io.BytesIO(encryptionInfo.read(headerSize))
    header = _parse_encryptionheader(blob)
    logger.debug(header)
    blob = io.BytesIO(encryptionInfo.read())
    verifier = _parse_encryptionverifier(blob, "RC4")  # TODO: Fix (cf. ooxml.py)
    logger.debug(verifier)
    info = {
        "salt": verifier["salt"],
        "keySize": header["keySize"],
        "encryptedVerifier": verifier["encryptedVerifier"],
        "encryptedVerifierHash": verifier["encryptedVerifierHash"],
    }
    return info


class _BIFFStream:
    def __init__(self, data):
        self.data = data

    def has_record(self, target):
        pos = self.data.tell()
        while True:
            h = self.data.read(4)
            if not h:
                self.data.seek(pos)
                return False
            num, size = unpack("<HH", h)
            if num == target:
                self.data.seek(pos)
                return True
            else:
                self.data.read(size)

    def skip_to(self, target):
        while True:
            h = self.data.read(4)
            if not h:
                raise exceptions.ParseError("Record not found")
            num, size = unpack("<HH", h)
            if num == target:
                return num, size
            else:
                self.data.read(size)

    def iter_record(self):
        while True:
            h = self.data.read(4)
            if not h:
                break
            num, size = unpack("<HH", h)
            record = io.BytesIO(self.data.read(size))
            yield num, size, record


[docs]class Xls97File(base.BaseOfficeFile): """Return a MS-XLS file object. Examples: >>> with open("tests/inputs/rc4cryptoapi_password.xls", "rb") as f: ... officefile = Xls97File(f) ... officefile.load_key(password="Password1234_") >>> with open("tests/inputs/xor_password_123456789012345.xls", "rb") as f: ... officefile = Xls97File(f) ... officefile.load_key(password="123456789012345") >>> with open("tests/inputs/rc4cryptoapi_password.xls", "rb") as f: ... officefile = Xls97File(f) ... officefile.load_key(password="0000") Traceback (most recent call last): ... msoffcrypto.exceptions.InvalidKeyError: ... """ def __init__(self, file): self.file = file ole = olefile.OleFileIO(file) # do not close this, would close file self.ole = ole self.format = "xls97" self.keyTypes = ["password"] self.key = None self.salt = None workbook = ole.openstream("Workbook") # closed in destructor Data = namedtuple("Data", ["workbook"]) self.data = Data( workbook=workbook, ) def __del__(self): """Destructor, closes opened stream.""" if hasattr(self, "data") and self.data and self.data.workbook: self.data.workbook.close()
[docs] def load_key(self, password=None): self.data.workbook.seek(0) workbook = _BIFFStream(self.data.workbook) # workbook stream consists of records, each of which begins with its ID number. # Record IDs (in decimal) are listed here: https://msdn.microsoft.com/en-us/library/dd945945(v=office.12).aspx # workbook stream's structure is WORKBOOK = BOF WORKBOOKCONTENT and so forth # as in https://msdn.microsoft.com/en-us/library/dd952177(v=office.12).aspx # A record begins with its length (in bytes). (num,) = unpack("<H", workbook.data.read(2)) assert num == 2057 # BOF (size,) = unpack("<H", workbook.data.read(2)) workbook.data.read(size) # Skip BOF num, size = workbook.skip_to(recordNameNum["FilePass"]) # Skip to FilePass; TODO: Raise exception if not encrypted # FilePass: https://msdn.microsoft.com/en-us/library/dd952596(v=office.12).aspx # If this record exists, the workbook MUST be encrypted. (wEncryptionType,) = unpack("<H", workbook.data.read(2)) encryptionInfo = io.BytesIO(workbook.data.read(size - 2)) if wEncryptionType == 0x0000: # XOR obfuscation key, verificationBytes = unpack("<HH", encryptionInfo.read(4)) if DocumentXOR.verifypw(password, verificationBytes): self.type = "xor" self.key = password self.loc_index = 0 else: raise exceptions.InvalidKeyError("Failed to verify password") elif wEncryptionType == 0x0001: # RC4 encryptionVersionInfo = encryptionInfo.read(4) vMajor, vMinor = unpack("<HH", encryptionVersionInfo) logger.debug("Version: {} {}".format(vMajor, vMinor)) if vMajor == 0x0001 and vMinor == 0x0001: # RC4 info = _parse_header_RC4(encryptionInfo) if DocumentRC4.verifypw(password, info["salt"], info["encryptedVerifier"], info["encryptedVerifierHash"]): self.type = "rc4" self.key = password self.salt = info["salt"] else: raise exceptions.InvalidKeyError("Failed to verify password") elif vMajor in [0x0002, 0x0003, 0x0004] and vMinor == 0x0002: # RC4 CryptoAPI info = _parse_header_RC4CryptoAPI(encryptionInfo) if DocumentRC4CryptoAPI.verifypw( password, info["salt"], info["keySize"], info["encryptedVerifier"], info["encryptedVerifierHash"] ): self.type = "rc4_cryptoapi" self.key = password self.salt = info["salt"] self.keySize = info["keySize"] else: raise exceptions.InvalidKeyError("Failed to verify password") else: raise exceptions.DecryptionError("Unsupported encryption method")
[docs] def decrypt(self, ofile): # fd, _ofile_path = tempfile.mkstemp() # shutil.copyfile(os.path.realpath(self.file.name), _ofile_path) # outole = olefile.OleFileIO(_ofile_path, write_mode=True) # List of encrypted parts: https://msdn.microsoft.com/en-us/library/dd905723(v=office.12).aspx # Workbook stream self.data.workbook.seek(0) workbook = _BIFFStream(self.data.workbook) plain_buf = [] encrypted_buf = io.BytesIO() record_info = [] for i, (num, size, record) in enumerate(workbook.iter_record()): # Remove encryption, pad by zero to preserve stream size if num == recordNameNum["FilePass"]: plain_buf += [0, 0] + list(pack("<H", size)) + [0] * size encrypted_buf.write(b"\x00" * (4 + size)) # The following records MUST NOT be obfuscated or encrypted: BOF (section 2.4.21), # FilePass (section 2.4.117), UsrExcl (section 2.4.339), FileLock (section 2.4.116), # InterfaceHdr (section 2.4.146), RRDInfo (section 2.4.227), and RRDHead (section 2.4.226). elif num in [ recordNameNum["BOF"], recordNameNum["FilePass"], recordNameNum["UsrExcl"], recordNameNum["FileLock"], recordNameNum["InterfaceHdr"], recordNameNum["RRDInfo"], recordNameNum["RRDHead"], ]: header = pack("<HH", num, size) plain_buf += list(header) + list(record.read()) encrypted_buf.write(b"\x00" * (4 + size)) # The lbPlyPos field of the BoundSheet8 record (section 2.4.28) MUST NOT be encrypted. elif num == recordNameNum["BoundSheet8"]: header = pack("<HH", num, size) plain_buf += list(header) + list(record.read(4)) + [-2] * (size - 4) # Preserve lbPlyPos encrypted_buf.write(b"\x00" * 4 + b"\x00" * 4 + record.read()) else: header = pack("<HH", num, size) plain_buf += list(header) + [-1] * size encrypted_buf.write(b"\x00" * 4 + record.read()) self.data_size = encrypted_buf.tell() encrypted_buf.seek(0) if self.type == "rc4": dec = DocumentRC4.decrypt(self.key, self.salt, encrypted_buf, blocksize=1024) elif self.type == "rc4_cryptoapi": dec = DocumentRC4CryptoAPI.decrypt(self.key, self.salt, self.keySize, encrypted_buf, blocksize=1024) elif self.type == "xor": dec = DocumentXOR.decrypt(self.key, encrypted_buf, plain_buf, record_info, 10) for c in plain_buf: if c == -1 or c == -2: dec.seek(1, 1) else: dec.write(bytearray([c])) dec.seek(0) # f = open('Workbook', 'wb') # f.write(dec.read()) # dec.seek(0) workbook_dec = dec with tempfile.TemporaryFile() as _ofile: self.file.seek(0) shutil.copyfileobj(self.file, _ofile) outole = olefile.OleFileIO(_ofile, write_mode=True) outole.write_stream("Workbook", workbook_dec.read()) # _ofile = open(_ofile_path, 'rb') _ofile.seek(0) shutil.copyfileobj(_ofile, ofile) return
[docs] def is_encrypted(self): r""" Test if the file is encrypted. >>> f = open("tests/inputs/plain.xls", "rb") >>> file = Xls97File(f) >>> file.is_encrypted() False >>> f = open("tests/inputs/rc4cryptoapi_password.xls", "rb") >>> file = Xls97File(f) >>> file.is_encrypted() True """ # Utilising the method above, check for encryption type. self.data.workbook.seek(0) workbook = _BIFFStream(self.data.workbook) (num,) = unpack("<H", workbook.data.read(2)) assert num == 2057 (size,) = unpack("<H", workbook.data.read(2)) workbook.data.read(size) if not workbook.has_record(recordNameNum["FilePass"]): return False num, size = workbook.skip_to(recordNameNum["FilePass"]) (wEncryptionType,) = unpack("<H", workbook.data.read(2)) if wEncryptionType == 0x0001: # RC4 return True elif wEncryptionType == 0x0000: # XOR obfuscation return True else: return False