20
0

refactor: optimize Workbook.get_string

This commit is contained in:
yangfu 2022-08-04 15:39:04 +08:00
parent 4a6cf26307
commit f71e284af7
4 changed files with 127 additions and 19 deletions

86
binary_read.go Normal file
View File

@ -0,0 +1,86 @@
package xls
import (
"encoding/binary"
"io"
)
func ReadBytes(r io.Reader, size int) ([]byte, error) {
buf := make([]byte, size)
if _, err := r.Read(buf); err != nil {
return buf, err
}
return buf, nil
}
func MustReadBytes(r io.Reader, size int) []byte {
buf, _ := ReadBytes(r, size)
return buf
}
func ReadByte(r io.Reader) (byte, error) {
buf, err := ReadBytes(r, 1)
if err != nil {
return 0, err
}
return buf[0], nil
}
func ReadUint16(r io.Reader) (uint16, error) {
buf, err := ReadBytes(r, 2)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint16(buf), nil
}
func ReadUint32(r io.Reader) (uint32, error) {
buf, err := ReadBytes(r, 4)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf), nil
}
func ReadBoundSheet(r io.Reader) *boundsheet {
var bs = new(boundsheet)
buf, _ := ReadBytes(r, 7)
bs.Filepos = binary.LittleEndian.Uint32(buf[0:4])
bs.Visible = buf[4]
bs.Type = buf[5]
bs.Name = buf[6]
return bs
}
func ReadRowInfo(r io.Reader) *rowInfo {
row := new(rowInfo)
buf, _ := ReadBytes(r, 16)
row.Index = binary.LittleEndian.Uint16(buf[0:2])
row.Fcell = binary.LittleEndian.Uint16(buf[2:4])
row.Lcell = binary.LittleEndian.Uint16(buf[4:6])
row.Height = binary.LittleEndian.Uint16(buf[6:8])
row.Notused = binary.LittleEndian.Uint16(buf[8:10])
row.Notused2 = binary.LittleEndian.Uint16(buf[10:12])
row.Flags = binary.LittleEndian.Uint32(buf[12:16])
return row
}
func ReadLabelsstCol(r io.Reader) *LabelsstCol {
col := new(LabelsstCol)
buf, _ := ReadBytes(r, 10)
col.RowB = binary.LittleEndian.Uint16(buf[0:2])
col.FirstColB = binary.LittleEndian.Uint16(buf[2:4])
col.Xf = binary.LittleEndian.Uint16(buf[4:6])
col.Sst = binary.LittleEndian.Uint32(buf[6:10])
return col
}
func ReadBof(r io.Reader, row *bof) error {
buf, err := ReadBytes(r, 4)
if err != nil {
return err
}
row.Id = binary.LittleEndian.Uint16(buf[0:2])
row.Size = binary.LittleEndian.Uint16(buf[2:4])
return err
}

10
go.mod Normal file
View File

@ -0,0 +1,10 @@
module github.com/extrame/xls
go 1.16
require (
github.com/extrame/goyymmdd v0.0.0-20210114090516-7cc815f00d1a
github.com/extrame/ole2 v0.0.0-20160812065207-d69429661ad7
github.com/tealeg/xlsx v1.0.5
golang.org/x/text v0.3.7
)

View File

@ -45,7 +45,8 @@ func (w *WorkBook) Parse(buf io.ReadSeeker) {
// buf := bytes.NewReader(bts) // buf := bytes.NewReader(bts)
offset := 0 offset := 0
for { for {
if err := binary.Read(buf, binary.LittleEndian, b); err == nil { //if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
if err := ReadBof(buf, b); err == nil {
bof_pre, b, offset = w.parseBof(buf, b, bof_pre, offset) bof_pre, b, offset = w.parseBof(buf, b, bof_pre, offset)
} else { } else {
break break
@ -72,8 +73,9 @@ func (w *WorkBook) addFormat(format *Format) {
func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int) (after *bof, after_using *bof, offset int) { func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int) (after *bof, after_using *bof, offset int) {
after = b after = b
after_using = pre after_using = pre
var bts = make([]byte, b.Size) //var bts = make([]byte, b.Size)
binary.Read(buf, binary.LittleEndian, bts) //binary.Read(buf, binary.LittleEndian, bts)
var bts = MustReadBytes(buf, int(b.Size))
buf_item := bytes.NewReader(bts) buf_item := bytes.NewReader(bts)
switch b.Id { switch b.Id {
case 0x809: case 0x809:
@ -135,7 +137,8 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
offset = i offset = i
case 0x85: // boundsheet case 0x85: // boundsheet
var bs = new(boundsheet) var bs = new(boundsheet)
binary.Read(buf_item, binary.LittleEndian, bs) //binary.Read(buf_item, binary.LittleEndian, bs)
bs = ReadBoundSheet(buf_item)
// different for BIFF5 and BIFF8 // different for BIFF5 and BIFF8
wb.addSheet(bs, buf_item) wb.addSheet(bs, buf_item)
case 0x0e0: // XF case 0x0e0: // XF
@ -177,15 +180,18 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
var richtext_num = uint16(0) var richtext_num = uint16(0)
var phonetic_size = uint32(0) var phonetic_size = uint32(0)
var flag byte var flag byte
err = binary.Read(buf, binary.LittleEndian, &flag) //err = binary.Read(buf, binary.LittleEndian, &flag)
flag, err = ReadByte(buf)
if flag&0x8 != 0 { if flag&0x8 != 0 {
err = binary.Read(buf, binary.LittleEndian, &richtext_num) //err = binary.Read(buf, binary.LittleEndian, &richtext_num)
richtext_num, err = ReadUint16(buf)
} else if w.continue_rich > 0 { } else if w.continue_rich > 0 {
richtext_num = w.continue_rich richtext_num = w.continue_rich
w.continue_rich = 0 w.continue_rich = 0
} }
if flag&0x4 != 0 { if flag&0x4 != 0 {
err = binary.Read(buf, binary.LittleEndian, &phonetic_size) //err = binary.Read(buf, binary.LittleEndian, &phonetic_size)
phonetic_size, err = ReadUint32(buf)
} else if w.continue_apsb > 0 { } else if w.continue_apsb > 0 {
phonetic_size = w.continue_apsb phonetic_size = w.continue_apsb
w.continue_apsb = 0 w.continue_apsb = 0
@ -194,7 +200,8 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
var bts = make([]uint16, size) var bts = make([]uint16, size)
var i = uint16(0) var i = uint16(0)
for ; i < size && err == nil; i++ { for ; i < size && err == nil; i++ {
err = binary.Read(buf, binary.LittleEndian, &bts[i]) //err = binary.Read(buf, binary.LittleEndian, &bts[i])
bts[i], err = ReadUint16(buf)
} }
// when eof found, we dont want to append last element // when eof found, we dont want to append last element
@ -226,15 +233,16 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
res = string(runes) res = string(runes)
} }
if richtext_num > 0 { if richtext_num > 0 {
var bts []byte //var bts []byte
var seek_size int64 var seek_size int64
if w.Is5ver { if w.Is5ver {
seek_size = int64(2 * richtext_num) seek_size = int64(2 * richtext_num)
} else { } else {
seek_size = int64(4 * richtext_num) seek_size = int64(4 * richtext_num)
} }
bts = make([]byte, seek_size) //bts = make([]byte, seek_size)
err = binary.Read(buf, binary.LittleEndian, bts) //err = binary.Read(buf, binary.LittleEndian, bts)
_, err = ReadBytes(buf, int(seek_size))
if err == io.EOF { if err == io.EOF {
w.continue_rich = richtext_num w.continue_rich = richtext_num
} }
@ -242,9 +250,10 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
// err = binary.Read(buf, binary.LittleEndian, bts) // err = binary.Read(buf, binary.LittleEndian, bts)
} }
if phonetic_size > 0 { if phonetic_size > 0 {
var bts []byte //var bts []byte
bts = make([]byte, phonetic_size) //bts = make([]byte, phonetic_size)
err = binary.Read(buf, binary.LittleEndian, bts) //err = binary.Read(buf, binary.LittleEndian, bts)
_, err = ReadBytes(buf, int(phonetic_size))
if err == io.EOF { if err == io.EOF {
w.continue_apsb = phonetic_size w.continue_apsb = phonetic_size
} }

View File

@ -51,7 +51,8 @@ func (w *WorkSheet) parse(buf io.ReadSeeker) {
var bof_pre *bof var bof_pre *bof
var col_pre interface{} var col_pre interface{}
for { for {
if err := binary.Read(buf, binary.LittleEndian, b); err == nil { //if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
if err := ReadBof(buf, b); err == nil {
bof_pre, col_pre = w.parseBof(buf, b, bof_pre, col_pre) bof_pre, col_pre = w.parseBof(buf, b, bof_pre, col_pre)
if b.Id == 0xa { if b.Id == 0xa {
break break
@ -81,8 +82,9 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof, col_pre interf
w.rightToLeft = (sheetOptions & 0x40) != 0 w.rightToLeft = (sheetOptions & 0x40) != 0
w.Selected = (sheetOptions & 0x400) != 0 w.Selected = (sheetOptions & 0x400) != 0
case 0x208: //ROW case 0x208: //ROW
r := new(rowInfo) //r := new(rowInfo)
binary.Read(buf, binary.LittleEndian, r) //binary.Read(buf, binary.LittleEndian, r)
r := ReadRowInfo(buf)
w.addRow(r) w.addRow(r)
case 0x0BD: //MULRK case 0x0BD: //MULRK
mc := new(MulrkCol) mc := new(MulrkCol)
@ -129,8 +131,9 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof, col_pre interf
col = new(RkCol) col = new(RkCol)
binary.Read(buf, binary.LittleEndian, col) binary.Read(buf, binary.LittleEndian, col)
case 0xFD: //LABELSST case 0xFD: //LABELSST
col = new(LabelsstCol) //col = new(LabelsstCol)
binary.Read(buf, binary.LittleEndian, col) //binary.Read(buf, binary.LittleEndian, col)
col = ReadLabelsstCol(buf)
case 0x204: case 0x204:
c := new(labelCol) c := new(labelCol)
binary.Read(buf, binary.LittleEndian, &c.BlankCol) binary.Read(buf, binary.LittleEndian, &c.BlankCol)