From ed99fddcfb5d1daec6008228b53197d33e0c5a9f Mon Sep 17 00:00:00 2001 From: Liu Ming Date: Tue, 25 Aug 2015 14:21:00 +0800 Subject: [PATCH] fix the bug of continue utf16 string reading --- workbook.go | 56 +++++++++++++++++++++++++++++++++------------------- worksheet.go | 2 +- 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/workbook.go b/workbook.go index 07ae5d9..d48e605 100644 --- a/workbook.go +++ b/workbook.go @@ -9,16 +9,17 @@ import ( ) type WorkBook struct { - Is5ver bool - Type uint16 - Codepage uint16 - Xfs []st_xf_data - Fonts []Font - Formats map[uint16]*Format - Sheets []*WorkSheet - Author string - rs io.ReadSeeker - sst []string + Is5ver bool + Type uint16 + Codepage uint16 + Xfs []st_xf_data + Fonts []Font + Formats map[uint16]*Format + Sheets []*WorkSheet + Author string + rs io.ReadSeeker + sst []string + continue_utf16 uint16 } func newWookBookFromOle2(rs io.ReadSeeker) *WorkBook { @@ -33,12 +34,12 @@ func newWookBookFromOle2(rs io.ReadSeeker) *WorkBook { func (w *WorkBook) Parse(buf io.ReadSeeker) { bof := new(BOF) - var bof_pre *BOF + bof_pre := new(BOF) // buf := bytes.NewReader(bts) offset := 0 for { if err := binary.Read(buf, binary.LittleEndian, bof); err == nil { - bof_pre, offset = w.parseBof(buf, bof, bof_pre, offset) + bof_pre, bof, offset = w.parseBof(buf, bof, bof_pre, offset) } else { break } @@ -58,8 +59,9 @@ func (w *WorkBook) addFormat(format *Format) { w.Formats[format.Head.Index] = format } -func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *BOF, pre *BOF, offset_pre int) (after *BOF, offset int) { +func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *BOF, pre *BOF, offset_pre int) (after *BOF, after_using *BOF, offset int) { after = b + after_using = pre var bts = make([]byte, b.Size) binary.Read(buf, binary.LittleEndian, bts) buf_item := bytes.NewReader(bts) @@ -74,18 +76,25 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *BOF, pre *BOF, offset_pre int case 0x042: // CODEPAGE binary.Read(buf_item, binary.LittleEndian, &wb.Codepage) case 0x3c: // CONTINUE - var bts = make([]byte, b.Size) - binary.Read(buf_item, binary.LittleEndian, bts) - buf_item := bytes.NewReader(bts) if pre.Id == 0xfc { var size uint16 - if err := binary.Read(buf_item, binary.LittleEndian, &size); err == nil { - wb.sst[offset_pre] = wb.get_string(buf_item, size) + var err error + if wb.continue_utf16 > 1 { + size = wb.continue_utf16 + wb.continue_utf16 = 0 + offset_pre-- + } else { + err = binary.Read(buf_item, binary.LittleEndian, &size) + } + for err == nil && offset_pre < len(wb.sst) { + wb.sst[offset_pre] = wb.sst[offset_pre] + wb.get_string(buf_item, size) offset_pre++ + err = binary.Read(buf_item, binary.LittleEndian, &size) } } offset = offset_pre after = pre + after_using = b case 0xfc: // SST info := new(SstInfo) binary.Read(buf_item, binary.LittleEndian, info) @@ -148,9 +157,16 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) string { } if flag&0x1 != 0 { var bts = make([]uint16, size) - binary.Read(buf, binary.LittleEndian, &bts) - runes := utf16.Decode(bts) + var err error + var i = uint16(0) + for ; i < size && err == nil; i++ { + err = binary.Read(buf, binary.LittleEndian, &bts[i]) + } + runes := utf16.Decode(bts[:i]) res = string(runes) + if i < size { + w.continue_utf16 = size - i + 1 + } } else { var bts = make([]byte, size) binary.Read(buf, binary.LittleEndian, &bts) diff --git a/worksheet.go b/worksheet.go index c04b4fa..b7853f5 100644 --- a/worksheet.go +++ b/worksheet.go @@ -143,7 +143,7 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, bof *BOF, pre *BOF) *BOF { case 0xa: log.Println("sheet end") default: - log.Printf("Unknow %X,%d\n", bof.Id, bof.Size) + // log.Printf("Unknow %X,%d\n", bof.Id, bof.Size) buf.Seek(int64(bof.Size), 1) } if col != nil {