1
0
mirror of https://github.com/pbnjay/grate.git synced 2025-01-22 13:43:21 +02:00

working biff8 parser and rc4 decryption

This commit is contained in:
Jeremy Jay 2021-02-05 11:07:57 -05:00
parent a477a30993
commit b17e0da28d
7 changed files with 1871 additions and 0 deletions

95
xls/crypto/crypto.go Normal file
View File

@ -0,0 +1,95 @@
package crypto
import (
"bytes"
"encoding/binary"
"fmt"
)
// Decryptor describes methods to decrypt an excel sheet.
type Decryptor interface {
// SetPassword for the decryption.
SetPassword(password []byte)
// Read implements the io.Reader interface.
Read(p []byte) (n int, err error)
// Write implements the io.Writer interface.
Write(p []byte) (n int, err error)
// Flush tells the decryptor to decrypt the latest block.
Flush()
// Reset the decryptor, and clear all written and readable data.
Reset()
}
// Algorithms designed based on specs in MS-OFFCRYPTO:
// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/3c34d72a-1a61-4b52-a893-196f9157f083
// Important notes from MS-XLS section 2.2.10:
// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06
// When obfuscating or encrypting BIFF records in these streams the record type and
// record size components MUST NOT be obfuscated or encrypted.
// In addition the following records MUST NOT be obfuscated or encrypted:
// BOF (section 2.4.21), FilePass (section 2.4.117), UsrExcl (section 2.4.339),
// FileLock (section 2.4.116), InterfaceHdr (section 2.4.146), RRDInfo (section 2.4.227),
// and RRDHead (section 2.4.226). Additionally, the lbPlyPos field of the BoundSheet8
// record (section 2.4.28) MUST NOT be encrypted.
// For RC4 encryption and RC4 CryptoAPI encryption, the Unicode password string is used
// to generate the encryption key as specified in [MS-OFFCRYPTO] section 2.3.6.2 or
// [MS-OFFCRYPTO] section 2.3.5.2 depending on the RC4 algorithm used. The record data
// is then encrypted by the specific RC4 algorithm in 1024-byte blocks. The block number
// is set to zero at the beginning of every BIFF record stream, and incremented by one
// at each 1024-byte boundary. Bytes to be encrypted are passed into the RC4 encryption
// function and then written to the stream. For unencrypted records and the record
// headers consisting of the record type and record size, a byte buffer of all zeros,
// of the same size as the section of unencrypted bytes, is passed into the RC4
// encryption function. The results are then ignored and the unencrypted bytes are
// written to the stream.
// DefaultXLSPassword is the default encryption password defined by note
// <100> Section 2.4.191: If the value of the wPassword field of the Password record in
// the Globals Substream is not 0x0000, Excel 97, Excel 2000, Excel 2002, Office Excel
// 2003, Office Excel 2007, and Excel 2010 encrypt the document as specified in [MS-OFFCRYPTO],
// section 2.3. If an encryption password is not specified or the workbook or sheet is only
// protected, the document is encrypted with the default password of:
var DefaultXLSPassword = "VelvetSweatshop"
/////////////
// 2.3.6.1
type basicRC4Encryption struct {
MajorVersion uint16
MinorVersion uint16
Salt [16]byte
Verifier [16]byte
VerifierHash [16]byte
}
// NewBasicRC4 implements the standard RC4 decryption.
func NewBasicRC4(data []byte) (Decryptor, error) {
h := basicRC4Encryption{}
b := bytes.NewReader(data)
err := binary.Read(b, binary.LittleEndian, &h)
if err != nil {
return nil, err
}
if h.MinorVersion != 1 {
return nil, fmt.Errorf("xls: unknown basic-RC4 minor version %d (%d byte record)",
h.MinorVersion, len(data))
}
if len(data) != 52 {
return nil, fmt.Errorf("xls: data length is invalid (expected 52 bytes, got %d)",
len(data))
}
d := &rc4Writer{
Salt: make([]byte, len(h.Salt)),
}
copy(d.Salt, h.Salt[:])
return d, d.Verify(h.Verifier[:], h.VerifierHash[:])
}

149
xls/crypto/rc4.go Normal file
View File

@ -0,0 +1,149 @@
package crypto
import (
"bytes"
"crypto/md5"
"crypto/rc4"
"encoding/binary"
"fmt"
)
var _ Decryptor = &rc4Writer{}
func (d *rc4Writer) Write(data []byte) (n int, err error) {
x := len(data)
for len(data) > 0 {
n := copy(d.bytes[d.offset:], data)
d.offset += n
if d.offset >= 1024 {
if d.offset != 1024 {
panic("invalid offset from write")
}
d.Flush()
}
data = data[n:]
}
return x, nil
}
func (d *rc4Writer) Read(data []byte) (n int, err error) {
return d.buf.Read(data)
}
// Reset to block 0, and clear all written and readable data.
func (d *rc4Writer) Reset() {
d.block = 0
d.offset = 0
d.buf.Reset()
}
// Flush tells the decryptor to decrypt the latest block.
func (d *rc4Writer) Flush() {
var zeros [1024]byte
if d.offset < 1024 {
d.offset += copy(d.bytes[d.offset:], zeros[:])
}
if d.offset != 1024 {
panic("invalid offset fill")
}
// decrypt and write results to output buffer
d.startBlock()
d.dec.XORKeyStream(d.bytes[:], d.bytes[:])
d.buf.Write(d.bytes[:])
d.offset = 0
d.block++
}
// SetPassword for the decryption.
func (d *rc4Writer) SetPassword(password []byte) {
d.Password = make([]rune, len(password))
for i, p := range password {
d.Password[i] = rune(p)
}
/// compute the first part of the encryption key
result := generateStd97Key(d.Password, d.Salt)
d.encKey = make([]byte, len(result))
copy(d.encKey, result)
}
type rc4Writer struct {
block uint32
offset int
bytes [1024]byte
// records the decrypted data
buf bytes.Buffer
///////
// decrypter for RC4 content streams
dec *rc4.Cipher
cipherKey []byte // H1 per 2.3.6.2
encKey []byte // Hfinal per 2.3.6.2
Salt []byte
Password []rune
}
func (d *rc4Writer) Verify(everifier, everifierHash []byte) error {
d.Reset()
d.startBlock()
var temp1 [16]byte
var temp2 [16]byte
d.dec.XORKeyStream(temp1[:], everifier)
d.dec.XORKeyStream(temp2[:], everifierHash)
newhash := md5.Sum(temp1[:])
for i, c := range newhash {
if temp2[i] != c {
return fmt.Errorf("verification failed")
}
}
return nil
}
/////////////////////
func (d *rc4Writer) startBlock() {
if d.encKey == nil {
d.SetPassword([]byte(DefaultXLSPassword))
}
d.cipherKey = make([]byte, 16)
copy(d.cipherKey, d.encKey[:5])
binary.LittleEndian.PutUint32(d.cipherKey[5:], d.block)
mhash := md5.Sum(d.cipherKey[:9])
d.dec, _ = rc4.NewCipher(mhash[:])
}
func generateStd97Key(passData []rune, salt []byte) []byte {
if len(passData) == 0 || len(salt) != 16 {
panic("invalid keygen material")
}
passBytes := make([]byte, len(passData)*2)
for i, c := range passData {
binary.LittleEndian.PutUint16(passBytes[2*i:], uint16(c))
}
// digest the IV then copy back into pKeyData
h0 := md5.Sum(passBytes)
// now do the final set of keygen ops
msum := md5.New()
for i := 0; i < 16; i++ {
msum.Write(h0[:5])
msum.Write(salt)
}
// return H1
temp := make([]byte, 0, 16)
temp = msum.Sum(temp)
return temp
}

1076
xls/records.go Normal file

File diff suppressed because it is too large Load Diff

40
xls/simple_test.go Normal file
View File

@ -0,0 +1,40 @@
package xls
import (
"context"
"log"
"testing"
)
func TestHeader(t *testing.T) {
wb, err := Open(context.Background(), "testdata/test.xls")
if err != nil {
t.Fatal(err)
}
log.Println(wb.filename)
}
func TestHeader2(t *testing.T) {
wb, err := Open(context.Background(), "testdata/test2.xls")
if err != nil {
t.Fatal(err)
}
log.Println(wb.filename)
}
func TestHeader3(t *testing.T) {
wb, err := Open(context.Background(), "testdata/test3.xls")
if err != nil {
t.Fatal(err)
}
log.Println(wb.filename)
}
func TestHeader4(t *testing.T) {
wb, err := Open(context.Background(), "testdata/test4.xls")
if err != nil {
t.Fatal(err)
}
log.Println(wb.filename)
}

233
xls/strings.go Normal file
View File

@ -0,0 +1,233 @@
package xls
import (
"encoding/binary"
"io"
"io/ioutil"
"log"
"unicode/utf16"
)
// 2.5.240
func decodeShortXLUnicodeString(r io.Reader) (string, error) {
var cch, flags uint8
err := binary.Read(r, binary.LittleEndian, &cch)
if err != nil {
return "", err
}
err = binary.Read(r, binary.LittleEndian, &flags)
if err != nil {
return "", err
}
content := make([]uint16, cch)
if (flags & 0x1) == 0 {
// 16-bit characters but only the bottom 8bits
contentBytes := make([]byte, cch)
n, err2 := io.ReadFull(r, contentBytes)
if n == 0 && err2 != io.ErrUnexpectedEOF {
err = err2
}
for i, x := range contentBytes {
content[i] = uint16(x)
}
} else {
// 16-bit characters
err = binary.Read(r, binary.LittleEndian, content)
}
return string(utf16.Decode(content)), nil
}
// 2.5.293
func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
var cch, cRun uint16
var flags uint8
var cbExtRs int32
err := binary.Read(r, binary.LittleEndian, &cch)
if err != nil {
log.Println("x1", err)
return "", err
}
err = binary.Read(r, binary.LittleEndian, &flags)
if err != nil {
log.Println("x2", err)
return "", err
}
if (flags & 0x8) != 0 {
log.Println("FORMATTING PRESENT")
// rich formating data is present
err = binary.Read(r, binary.LittleEndian, &cRun)
if err != nil {
log.Println("x3", err)
return "", err
}
}
if (flags & 0x4) != 0 {
log.Println("PHONETIC PRESENT")
// phonetic string data is present
err = binary.Read(r, binary.LittleEndian, &cbExtRs)
if err != nil {
log.Println("x4", err)
return "", err
}
}
content := make([]uint16, cch)
if (flags & 0x1) == 0 {
log.Println("8BIT DATA", cch)
// 16-bit characters but only the bottom 8bits
contentBytes := make([]byte, cch)
n, err2 := io.ReadFull(r, contentBytes)
if n == 0 && err2 != io.ErrUnexpectedEOF {
err = err2
}
if uint16(n) < cch {
contentBytes = contentBytes[:n]
content = content[:n]
}
for i, x := range contentBytes {
content[i] = uint16(x)
}
} else {
log.Println("16BIT DATA", cch)
// 16-bit characters
err = binary.Read(r, binary.LittleEndian, content)
}
if err != nil {
log.Println("x5", err)
}
//////
if cRun > 0 {
log.Println("READING FORMATTING DATA")
// rich formating data is present
_, err = io.CopyN(ioutil.Discard, r, int64(cRun)*4)
if err != nil {
log.Println("x6", err)
return "", err
}
}
if cbExtRs > 0 {
log.Println("READING PHONETIC DATA")
// phonetic string data is present
n, err := io.CopyN(ioutil.Discard, r, int64(cbExtRs))
if err != nil {
log.Println("x7", n, cbExtRs, err)
return "", err
}
}
//////
return string(utf16.Decode(content)), nil
}
// read in an array of XLUnicodeRichExtendedString s
func parseSST(recs []*rec) ([]string, error) {
totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4])
numStrings := binary.LittleEndian.Uint32(recs[0].Data[4:8])
// cell count limit is 65k x 256
if numStrings > 65536*256 {
log.Println("INVALID COUNTS total=", totalRefs, " -- n strings=", numStrings)
totalRefs = 0
numStrings = 65536 * 256
}
log.Println("total=", totalRefs, " -- n strings=", numStrings)
all := make([]string, 0, numStrings)
buf := recs[0].Data[8:]
for i := 0; i < len(recs); {
var blen int
var cRunBytes int
var flags byte
var current []byte
var cbExtRs uint32
for len(buf) > 0 {
slen := binary.LittleEndian.Uint16(buf)
buf = buf[2:]
flags = buf[0]
buf = buf[1:]
blen = int(slen)
if (flags & 0x1) != 0 {
// 16-bit characters
blen = int(slen) * 2
}
if (flags & 0x8) != 0 {
// rich formating data is present
cRun := binary.LittleEndian.Uint16(buf)
cRunBytes = int(cRun) * 4
buf = buf[2:]
}
if (flags & 0x4) != 0 {
// phonetic string data is present
cbExtRs = binary.LittleEndian.Uint32(buf)
buf = buf[4:]
}
// this block will read the string data, but transparently
// handle continuing across records
current = make([]byte, blen)
n := copy(current, buf)
current = current[:n]
buf = buf[n:]
for len(current) < blen {
i++
buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
n = int(blen) - len(current)
if n > len(buf) {
n = len(buf)
}
current = append(current, buf[:n]...)
buf = buf[n:]
}
if (flags & 1) == 0 {
s := string(current)
all = append(all, s)
} else {
x := make([]uint16, len(current)/2)
for y := 0; y < len(current); y += 2 {
x[y/2] = binary.LittleEndian.Uint16(current[y : y+2])
}
s := string(utf16.Decode(x))
all = append(all, s)
}
//log.Println(len(all), all[len(all)-1])
for cRunBytes > 0 {
if len(buf) >= int(cRunBytes) {
buf = buf[cRunBytes:]
cRunBytes = 0
} else {
cRunBytes -= len(buf)
i++
buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
}
}
for cbExtRs > 0 {
if len(buf) >= int(cbExtRs) {
buf = buf[cbExtRs:]
cbExtRs = 0
} else {
cbExtRs -= uint32(len(buf))
i++
buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
}
}
}
i++
if i < len(recs) {
buf = recs[i].Data
}
}
return all, nil
}

23
xls/structs.go Normal file
View File

@ -0,0 +1,23 @@
package xls
type header struct {
Version uint16 // An unsigned integer that specifies the BIFF version of the file. The value MUST be 0x0600.
DocType uint16 //An unsigned integer that specifies the document type of the substream of records following this record. For more information about the layout of the sub-streams in the workbook stream see File Structure.
RupBuild uint16 // An unsigned integer that specifies the build identifier.
RupYear uint16 // An unsigned integer that specifies the year when this BIFF version was first created. The value MUST be 0x07CC or 0x07CD.
MiscBits uint64 // lots of miscellaneous bits and flags we're not going to check
}
// 2.1.4
type rec struct {
RecType recordType //
RecSize uint16 // must be between 0 and 8224
Data []byte // len(rec.data) = rec.recsize
}
type boundSheet struct {
Position uint32 // A FilePointer as specified in [MS-OSHARED] section 2.2.1.5 that specifies the stream position of the start of the BOF record for the sheet.
HiddenState byte // (2 bits) An unsigned integer that specifies the hidden state of the sheet. MUST be a value from the following table:
SheetType byte // An unsigned integer that specifies the sheet type. 00=worksheet
Name string
}

255
xls/xls.go Normal file
View File

@ -0,0 +1,255 @@
// Package xls implements the Microsoft Excel Binary File Format (.xls) Structure.
// More specifically, it contains just enough detail to extract cell contents,
// data types, and last-calculated formula values. In particular, it does NOT
// implement formatting or formula calculations.
package xls
// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06
import (
"bytes"
"context"
"encoding/binary"
"errors"
"io"
"log"
"github.com/pbnjay/grate/xls/cfb"
"github.com/pbnjay/grate/xls/crypto"
)
type WorkBook struct {
filename string
ctx context.Context
doc cfb.Document
h *header
sheets []*boundSheet
codepage uint16
dateMode uint16
password string
substreams [][]*rec
decryptors map[int]crypto.Decryptor
}
func Open(ctx context.Context, filename string) (*WorkBook, error) {
doc, err := cfb.Open(filename)
if err != nil {
return nil, err
}
b := &WorkBook{
filename: filename,
ctx: ctx,
doc: doc,
}
rdr, err := doc.Open("Workbook")
if err != nil {
return nil, err
}
//br := bufio.NewReader(rdr)
err = b.loadFromStream(rdr)
return b, err
}
func (b *WorkBook) loadFromStream(r io.Reader) error {
b.decryptors = make(map[int]crypto.Decryptor)
b.h = &header{}
substr := -1
nr, err := b.nextRecord(r)
for err == nil {
if nr.RecType == RecTypeBOF {
substr++
b.substreams = append(b.substreams, []*rec{})
}
if nr.RecType == RecTypeFilePass {
etype := binary.LittleEndian.Uint16(nr.Data)
switch etype {
case 1:
b.decryptors[substr], err = crypto.NewBasicRC4(nr.Data[2:])
if err != nil {
log.Println("xls: rc4 encryption failed to set up", err)
return err
}
case 2, 3, 4:
log.Println("need Crypto API RC4 decryptor")
return errors.New("xls: unsupported Crypto API encryption method")
default:
return errors.New("xls: unsupported encryption method")
}
}
b.substreams[substr] = append(b.substreams[substr], nr)
nr, err = b.nextRecord(r)
}
if err == io.EOF {
err = nil
}
if err != nil {
return err
}
for ss, records := range b.substreams {
log.Printf("Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
if dec, ok := b.decryptors[ss]; ok {
log.Printf("Decrypting substream...")
dec.Reset()
var head [4]byte
for _, nr := range records {
binary.LittleEndian.PutUint16(head[:], uint16(nr.RecType))
binary.LittleEndian.PutUint16(head[2:], nr.RecSize)
// send the record for decryption
dec.Write(head[:])
dec.Write(nr.Data)
}
dec.Flush()
newrecset := make([]*rec, 0, len(records))
for _, nr := range records {
dec.Read(head[:]) // discard 4 byte header
dr := &rec{
RecType: nr.RecType,
RecSize: nr.RecSize,
Data: make([]byte, int(nr.RecSize)),
}
dec.Read(dr.Data)
switch nr.RecType {
case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
// keep original data
copy(dr.Data, nr.Data)
case RecTypeBoundSheet8:
// copy the position un-decrypted
copy(dr.Data[:4], nr.Data)
default:
// apply decryption
}
newrecset = append(newrecset, dr)
}
b.substreams[ss] = newrecset
records = newrecset
}
for i, nr := range records {
var bb io.Reader = bytes.NewReader(nr.Data)
switch nr.RecType {
case RecTypeSST:
//log.Println(i, nr.RecType)
recSet := []*rec{nr}
lastIndex := i
for len(records) > (lastIndex+1) && records[lastIndex+1].RecType == RecTypeContinue {
lastIndex++
recSet = append(recSet, records[lastIndex])
}
parseSST(recSet)
case RecTypeContinue:
// no-op (used above)
case RecTypeEOF:
log.Println("End Of Stream")
case RecTypeBOF:
err = binary.Read(bb, binary.LittleEndian, b.h)
if err != nil {
return err
}
if b.h.Version != 0x0600 {
return errors.New("xls: invalid file version")
}
if b.h.RupYear != 0x07CC && b.h.RupYear != 0x07CD {
return errors.New("xls: unsupported biff version")
}
if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 {
// we only support the workbook or worksheet substreams
log.Println("xls: unsupported document type")
break
}
case RecTypeCodePage:
//log.Println(i, nr.RecType)
err = binary.Read(bb, binary.LittleEndian, &b.codepage)
if err != nil {
return err
}
case RecTypeDate1904:
//log.Println(i, nr.RecType)
err = binary.Read(bb, binary.LittleEndian, &b.dateMode)
if err != nil {
return err
}
case RecTypeBoundSheet8:
//log.Println(i, nr.RecType)
bs := &boundSheet{}
err = binary.Read(bb, binary.LittleEndian, &bs.Position)
if err != nil {
log.Println("fail1", err)
return err
}
err = binary.Read(bb, binary.LittleEndian, &bs.HiddenState)
if err != nil {
log.Println("fail1", err)
return err
}
err = binary.Read(bb, binary.LittleEndian, &bs.SheetType)
if err != nil {
log.Println("fail1", err)
return err
}
bs.Name, err = decodeShortXLUnicodeString(bb)
if err != nil {
log.Println("fail2", err)
return err
}
b.sheets = append(b.sheets, bs)
log.Println("SHEET", bs.Name)
default:
//log.Println(i, "SKIPPED", nr.RecType)
}
}
}
return err
}
var errSkipped = errors.New("xls: skipped record type")
func (b *WorkBook) nextRecord(r io.Reader) (*rec, error) {
var rt recordType
var rs uint16
err := binary.Read(r, binary.LittleEndian, &rt)
if err != nil {
return nil, err
}
err = binary.Read(r, binary.LittleEndian, &rs)
if err != nil {
return nil, err
}
data := make([]byte, rs)
_, err = io.ReadFull(r, data)
if err != nil {
return nil, err
}
ret := &rec{rt, rs, data}
return ret, err
}