20
0

fix the bug of continue utf16 string reading

This commit is contained in:
Liu Ming 2015-08-25 14:21:00 +08:00
parent 708f1d8f15
commit ed99fddcfb
2 changed files with 37 additions and 21 deletions

View File

@ -19,6 +19,7 @@ type WorkBook struct {
Author string Author string
rs io.ReadSeeker rs io.ReadSeeker
sst []string sst []string
continue_utf16 uint16
} }
func newWookBookFromOle2(rs io.ReadSeeker) *WorkBook { func newWookBookFromOle2(rs io.ReadSeeker) *WorkBook {
@ -33,12 +34,12 @@ func newWookBookFromOle2(rs io.ReadSeeker) *WorkBook {
func (w *WorkBook) Parse(buf io.ReadSeeker) { func (w *WorkBook) Parse(buf io.ReadSeeker) {
bof := new(BOF) bof := new(BOF)
var bof_pre *BOF bof_pre := new(BOF)
// buf := bytes.NewReader(bts) // buf := bytes.NewReader(bts)
offset := 0 offset := 0
for { for {
if err := binary.Read(buf, binary.LittleEndian, bof); err == nil { if err := binary.Read(buf, binary.LittleEndian, bof); err == nil {
bof_pre, offset = w.parseBof(buf, bof, bof_pre, offset) bof_pre, bof, offset = w.parseBof(buf, bof, bof_pre, offset)
} else { } else {
break break
} }
@ -58,8 +59,9 @@ func (w *WorkBook) addFormat(format *Format) {
w.Formats[format.Head.Index] = format w.Formats[format.Head.Index] = format
} }
func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *BOF, pre *BOF, offset_pre int) (after *BOF, offset int) { func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *BOF, pre *BOF, offset_pre int) (after *BOF, after_using *BOF, offset int) {
after = b after = b
after_using = pre
var bts = make([]byte, b.Size) var bts = make([]byte, b.Size)
binary.Read(buf, binary.LittleEndian, bts) binary.Read(buf, binary.LittleEndian, bts)
buf_item := bytes.NewReader(bts) buf_item := bytes.NewReader(bts)
@ -74,18 +76,25 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *BOF, pre *BOF, offset_pre int
case 0x042: // CODEPAGE case 0x042: // CODEPAGE
binary.Read(buf_item, binary.LittleEndian, &wb.Codepage) binary.Read(buf_item, binary.LittleEndian, &wb.Codepage)
case 0x3c: // CONTINUE case 0x3c: // CONTINUE
var bts = make([]byte, b.Size)
binary.Read(buf_item, binary.LittleEndian, bts)
buf_item := bytes.NewReader(bts)
if pre.Id == 0xfc { if pre.Id == 0xfc {
var size uint16 var size uint16
if err := binary.Read(buf_item, binary.LittleEndian, &size); err == nil { var err error
wb.sst[offset_pre] = wb.get_string(buf_item, size) if wb.continue_utf16 > 1 {
size = wb.continue_utf16
wb.continue_utf16 = 0
offset_pre--
} else {
err = binary.Read(buf_item, binary.LittleEndian, &size)
}
for err == nil && offset_pre < len(wb.sst) {
wb.sst[offset_pre] = wb.sst[offset_pre] + wb.get_string(buf_item, size)
offset_pre++ offset_pre++
err = binary.Read(buf_item, binary.LittleEndian, &size)
} }
} }
offset = offset_pre offset = offset_pre
after = pre after = pre
after_using = b
case 0xfc: // SST case 0xfc: // SST
info := new(SstInfo) info := new(SstInfo)
binary.Read(buf_item, binary.LittleEndian, info) binary.Read(buf_item, binary.LittleEndian, info)
@ -148,9 +157,16 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) string {
} }
if flag&0x1 != 0 { if flag&0x1 != 0 {
var bts = make([]uint16, size) var bts = make([]uint16, size)
binary.Read(buf, binary.LittleEndian, &bts) var err error
runes := utf16.Decode(bts) var i = uint16(0)
for ; i < size && err == nil; i++ {
err = binary.Read(buf, binary.LittleEndian, &bts[i])
}
runes := utf16.Decode(bts[:i])
res = string(runes) res = string(runes)
if i < size {
w.continue_utf16 = size - i + 1
}
} else { } else {
var bts = make([]byte, size) var bts = make([]byte, size)
binary.Read(buf, binary.LittleEndian, &bts) binary.Read(buf, binary.LittleEndian, &bts)

View File

@ -143,7 +143,7 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, bof *BOF, pre *BOF) *BOF {
case 0xa: case 0xa:
log.Println("sheet end") log.Println("sheet end")
default: default:
log.Printf("Unknow %X,%d\n", bof.Id, bof.Size) // log.Printf("Unknow %X,%d\n", bof.Id, bof.Size)
buf.Seek(int64(bof.Size), 1) buf.Seek(int64(bof.Size), 1)
} }
if col != nil { if col != nil {