20
0

refactor: optimize Workbook.get_string

This commit is contained in:
yangfu 2022-08-04 15:39:04 +08:00
parent 4a6cf26307
commit f71e284af7
4 changed files with 127 additions and 19 deletions

86
binary_read.go Normal file
View File

@ -0,0 +1,86 @@
package xls
import (
"encoding/binary"
"io"
)
func ReadBytes(r io.Reader, size int) ([]byte, error) {
buf := make([]byte, size)
if _, err := r.Read(buf); err != nil {
return buf, err
}
return buf, nil
}
func MustReadBytes(r io.Reader, size int) []byte {
buf, _ := ReadBytes(r, size)
return buf
}
func ReadByte(r io.Reader) (byte, error) {
buf, err := ReadBytes(r, 1)
if err != nil {
return 0, err
}
return buf[0], nil
}
func ReadUint16(r io.Reader) (uint16, error) {
buf, err := ReadBytes(r, 2)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint16(buf), nil
}
func ReadUint32(r io.Reader) (uint32, error) {
buf, err := ReadBytes(r, 4)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf), nil
}
func ReadBoundSheet(r io.Reader) *boundsheet {
var bs = new(boundsheet)
buf, _ := ReadBytes(r, 7)
bs.Filepos = binary.LittleEndian.Uint32(buf[0:4])
bs.Visible = buf[4]
bs.Type = buf[5]
bs.Name = buf[6]
return bs
}
func ReadRowInfo(r io.Reader) *rowInfo {
row := new(rowInfo)
buf, _ := ReadBytes(r, 16)
row.Index = binary.LittleEndian.Uint16(buf[0:2])
row.Fcell = binary.LittleEndian.Uint16(buf[2:4])
row.Lcell = binary.LittleEndian.Uint16(buf[4:6])
row.Height = binary.LittleEndian.Uint16(buf[6:8])
row.Notused = binary.LittleEndian.Uint16(buf[8:10])
row.Notused2 = binary.LittleEndian.Uint16(buf[10:12])
row.Flags = binary.LittleEndian.Uint32(buf[12:16])
return row
}
func ReadLabelsstCol(r io.Reader) *LabelsstCol {
col := new(LabelsstCol)
buf, _ := ReadBytes(r, 10)
col.RowB = binary.LittleEndian.Uint16(buf[0:2])
col.FirstColB = binary.LittleEndian.Uint16(buf[2:4])
col.Xf = binary.LittleEndian.Uint16(buf[4:6])
col.Sst = binary.LittleEndian.Uint32(buf[6:10])
return col
}
func ReadBof(r io.Reader, row *bof) error {
buf, err := ReadBytes(r, 4)
if err != nil {
return err
}
row.Id = binary.LittleEndian.Uint16(buf[0:2])
row.Size = binary.LittleEndian.Uint16(buf[2:4])
return err
}

10
go.mod Normal file
View File

@ -0,0 +1,10 @@
module github.com/extrame/xls
go 1.16
require (
github.com/extrame/goyymmdd v0.0.0-20210114090516-7cc815f00d1a
github.com/extrame/ole2 v0.0.0-20160812065207-d69429661ad7
github.com/tealeg/xlsx v1.0.5
golang.org/x/text v0.3.7
)

View File

@ -45,7 +45,8 @@ func (w *WorkBook) Parse(buf io.ReadSeeker) {
// buf := bytes.NewReader(bts)
offset := 0
for {
if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
//if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
if err := ReadBof(buf, b); err == nil {
bof_pre, b, offset = w.parseBof(buf, b, bof_pre, offset)
} else {
break
@ -72,8 +73,9 @@ func (w *WorkBook) addFormat(format *Format) {
func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int) (after *bof, after_using *bof, offset int) {
after = b
after_using = pre
var bts = make([]byte, b.Size)
binary.Read(buf, binary.LittleEndian, bts)
//var bts = make([]byte, b.Size)
//binary.Read(buf, binary.LittleEndian, bts)
var bts = MustReadBytes(buf, int(b.Size))
buf_item := bytes.NewReader(bts)
switch b.Id {
case 0x809:
@ -135,7 +137,8 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
offset = i
case 0x85: // boundsheet
var bs = new(boundsheet)
binary.Read(buf_item, binary.LittleEndian, bs)
//binary.Read(buf_item, binary.LittleEndian, bs)
bs = ReadBoundSheet(buf_item)
// different for BIFF5 and BIFF8
wb.addSheet(bs, buf_item)
case 0x0e0: // XF
@ -177,15 +180,18 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
var richtext_num = uint16(0)
var phonetic_size = uint32(0)
var flag byte
err = binary.Read(buf, binary.LittleEndian, &flag)
//err = binary.Read(buf, binary.LittleEndian, &flag)
flag, err = ReadByte(buf)
if flag&0x8 != 0 {
err = binary.Read(buf, binary.LittleEndian, &richtext_num)
//err = binary.Read(buf, binary.LittleEndian, &richtext_num)
richtext_num, err = ReadUint16(buf)
} else if w.continue_rich > 0 {
richtext_num = w.continue_rich
w.continue_rich = 0
}
if flag&0x4 != 0 {
err = binary.Read(buf, binary.LittleEndian, &phonetic_size)
//err = binary.Read(buf, binary.LittleEndian, &phonetic_size)
phonetic_size, err = ReadUint32(buf)
} else if w.continue_apsb > 0 {
phonetic_size = w.continue_apsb
w.continue_apsb = 0
@ -194,7 +200,8 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
var bts = make([]uint16, size)
var i = uint16(0)
for ; i < size && err == nil; i++ {
err = binary.Read(buf, binary.LittleEndian, &bts[i])
//err = binary.Read(buf, binary.LittleEndian, &bts[i])
bts[i], err = ReadUint16(buf)
}
// when eof found, we dont want to append last element
@ -226,15 +233,16 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
res = string(runes)
}
if richtext_num > 0 {
var bts []byte
//var bts []byte
var seek_size int64
if w.Is5ver {
seek_size = int64(2 * richtext_num)
} else {
seek_size = int64(4 * richtext_num)
}
bts = make([]byte, seek_size)
err = binary.Read(buf, binary.LittleEndian, bts)
//bts = make([]byte, seek_size)
//err = binary.Read(buf, binary.LittleEndian, bts)
_, err = ReadBytes(buf, int(seek_size))
if err == io.EOF {
w.continue_rich = richtext_num
}
@ -242,9 +250,10 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
// err = binary.Read(buf, binary.LittleEndian, bts)
}
if phonetic_size > 0 {
var bts []byte
bts = make([]byte, phonetic_size)
err = binary.Read(buf, binary.LittleEndian, bts)
//var bts []byte
//bts = make([]byte, phonetic_size)
//err = binary.Read(buf, binary.LittleEndian, bts)
_, err = ReadBytes(buf, int(phonetic_size))
if err == io.EOF {
w.continue_apsb = phonetic_size
}

View File

@ -51,7 +51,8 @@ func (w *WorkSheet) parse(buf io.ReadSeeker) {
var bof_pre *bof
var col_pre interface{}
for {
if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
//if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
if err := ReadBof(buf, b); err == nil {
bof_pre, col_pre = w.parseBof(buf, b, bof_pre, col_pre)
if b.Id == 0xa {
break
@ -81,8 +82,9 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof, col_pre interf
w.rightToLeft = (sheetOptions & 0x40) != 0
w.Selected = (sheetOptions & 0x400) != 0
case 0x208: //ROW
r := new(rowInfo)
binary.Read(buf, binary.LittleEndian, r)
//r := new(rowInfo)
//binary.Read(buf, binary.LittleEndian, r)
r := ReadRowInfo(buf)
w.addRow(r)
case 0x0BD: //MULRK
mc := new(MulrkCol)
@ -129,8 +131,9 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof, col_pre interf
col = new(RkCol)
binary.Read(buf, binary.LittleEndian, col)
case 0xFD: //LABELSST
col = new(LabelsstCol)
binary.Read(buf, binary.LittleEndian, col)
//col = new(LabelsstCol)
//binary.Read(buf, binary.LittleEndian, col)
col = ReadLabelsstCol(buf)
case 0x204:
c := new(labelCol)
binary.Read(buf, binary.LittleEndian, &c.BlankCol)