21
0
xls/workbook.go

345 lines
8.1 KiB
Go
Raw Normal View History

2015-03-19 10:39:41 +01:00
package xls
import (
"bytes"
"encoding/binary"
"io"
2018-04-05 15:28:01 +02:00
"strings"
2015-03-19 10:39:41 +01:00
"unicode/utf16"
)
2015-09-30 04:40:01 +02:00
//xls workbook type
2015-03-19 10:39:41 +01:00
type WorkBook struct {
2018-04-05 15:28:01 +02:00
Is5ver bool
Type uint16
Codepage uint16
Xfs []XF
Fonts []Font
Formats map[uint16]*Format
2015-09-30 05:17:25 +02:00
sheets []*WorkSheet
Author string
rs io.ReadSeeker
sst []string
2018-04-05 15:28:01 +02:00
ref *extSheetRef
continue_utf16 uint16
continue_rich uint16
continue_apsb uint32
dateMode uint16
2015-03-19 10:39:41 +01:00
}
2015-09-30 04:40:01 +02:00
//read workbook from ole2 file
func newWorkBookFromOle2(rs io.ReadSeeker) *WorkBook {
2018-04-05 15:28:01 +02:00
var wb = &WorkBook{
rs: rs,
ref: new(extSheetRef),
sheets: make([]*WorkSheet, 0),
Formats: make(map[uint16]*Format),
}
wb.parse(rs)
wb.prepare()
2015-03-19 10:39:41 +01:00
return wb
}
2018-04-05 15:28:01 +02:00
func (w *WorkBook) parse(buf io.ReadSeeker) {
2015-09-30 04:40:01 +02:00
b := new(bof)
2018-04-05 15:28:01 +02:00
bp := new(bof)
2015-03-19 10:39:41 +01:00
offset := 0
2018-04-05 15:28:01 +02:00
2015-03-19 10:39:41 +01:00
for {
2015-09-30 04:40:01 +02:00
if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
2018-04-05 15:28:01 +02:00
bp, b, offset = w.parseBof(buf, b, bp, offset)
2015-03-19 10:39:41 +01:00
} else {
break
}
}
}
2015-09-30 04:40:01 +02:00
func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int) (after *bof, after_using *bof, offset int) {
2015-03-19 10:39:41 +01:00
after = b
after_using = pre
2015-03-19 10:39:41 +01:00
var bts = make([]byte, b.Size)
binary.Read(buf, binary.LittleEndian, bts)
2018-04-05 15:28:01 +02:00
item := bytes.NewReader(bts)
2015-03-19 10:39:41 +01:00
switch b.Id {
2018-04-05 15:28:01 +02:00
case 0x0809: // BOF
2015-09-30 04:40:01 +02:00
bif := new(biffHeader)
2018-04-05 15:28:01 +02:00
binary.Read(item, binary.LittleEndian, bif)
2015-03-19 10:39:41 +01:00
if bif.Ver != 0x600 {
wb.Is5ver = true
}
wb.Type = bif.Type
2018-04-05 15:28:01 +02:00
case 0x0042: // CODEPAGE
binary.Read(item, binary.LittleEndian, &wb.Codepage)
case 0x3C: // CONTINUE
2015-03-19 10:39:41 +01:00
if pre.Id == 0xfc {
var size uint16
var err error
2016-06-23 04:22:57 +02:00
if wb.continue_utf16 >= 1 {
size = wb.continue_utf16
wb.continue_utf16 = 0
} else {
2018-04-05 15:28:01 +02:00
err = binary.Read(item, binary.LittleEndian, &size)
}
for err == nil && offset_pre < len(wb.sst) {
2016-06-23 04:22:57 +02:00
var str string
if size > 0 {
2018-04-05 15:28:01 +02:00
str, err = wb.parseString(item, size)
wb.sst[offset_pre] = wb.sst[offset_pre] + str
}
2017-08-18 05:15:10 +02:00
if err == io.EOF {
break
}
2015-03-19 10:39:41 +01:00
offset_pre++
2018-04-05 15:28:01 +02:00
err = binary.Read(item, binary.LittleEndian, &size)
2015-03-19 10:39:41 +01:00
}
}
offset = offset_pre
after = pre
after_using = b
2018-04-05 15:28:01 +02:00
case 0x00FC: // SST
2015-03-19 10:39:41 +01:00
info := new(SstInfo)
2018-04-05 15:28:01 +02:00
binary.Read(item, binary.LittleEndian, info)
2015-03-19 10:39:41 +01:00
wb.sst = make([]string, info.Count)
2016-06-23 04:22:57 +02:00
var size uint16
var i = 0
for ; i < int(info.Count); i++ {
2017-02-21 09:59:50 +01:00
var err error
2018-04-05 15:28:01 +02:00
if err = binary.Read(item, binary.LittleEndian, &size); err == nil {
2016-06-23 04:22:57 +02:00
var str string
2018-04-05 15:28:01 +02:00
str, err = wb.parseString(item, size)
2016-06-23 04:22:57 +02:00
wb.sst[i] = wb.sst[i] + str
2017-02-21 09:59:50 +01:00
}
2016-09-30 05:46:51 +02:00
2017-02-21 09:59:50 +01:00
if err == io.EOF {
break
2015-03-19 10:39:41 +01:00
}
}
2016-06-23 04:22:57 +02:00
offset = i
2018-04-05 15:28:01 +02:00
case 0x0085: // SHEET
var bs = new(boundsheet)
2018-04-05 15:28:01 +02:00
binary.Read(item, binary.LittleEndian, bs)
2015-03-19 10:39:41 +01:00
// different for BIFF5 and BIFF8
2018-04-05 15:28:01 +02:00
wb.addSheet(bs, item)
case 0x0017: // EXTERNSHEET
if !wb.Is5ver {
binary.Read(item, binary.LittleEndian, &wb.ref.Num)
wb.ref.Info = make([]ExtSheetInfo, wb.ref.Num)
binary.Read(item, binary.LittleEndian, &wb.ref.Info)
}
case 0x00e0: // XF
2015-03-19 10:39:41 +01:00
if wb.Is5ver {
xf := new(Xf5)
2018-04-05 15:28:01 +02:00
binary.Read(item, binary.LittleEndian, xf)
2015-03-19 10:39:41 +01:00
wb.addXf(xf)
} else {
xf := new(Xf8)
2018-04-05 15:28:01 +02:00
binary.Read(item, binary.LittleEndian, xf)
2015-03-19 10:39:41 +01:00
wb.addXf(xf)
}
2018-04-05 15:28:01 +02:00
case 0x0031: // FONT
2015-03-19 10:39:41 +01:00
f := new(FontInfo)
2018-04-05 15:28:01 +02:00
binary.Read(item, binary.LittleEndian, f)
wb.addFont(f, item)
case 0x041E: //FORMAT
format := new(Format)
binary.Read(item, binary.LittleEndian, &format.Head)
if raw, err := wb.parseString(item, format.Head.Size); nil == err && "" != raw {
format.Raw = strings.Split(raw, ";")
} else {
format.Raw = []string{}
}
wb.addFormat(format)
case 0x0022: //DATEMODE
binary.Read(item, binary.LittleEndian, &wb.dateMode)
2015-03-19 10:39:41 +01:00
}
return
}
2018-04-05 15:28:01 +02:00
func (w *WorkBook) addXf(xf XF) {
w.Xfs = append(w.Xfs, xf)
}
func (w *WorkBook) addFont(font *FontInfo, buf io.ReadSeeker) {
name, _ := w.parseString(buf, uint16(font.NameB))
w.Fonts = append(w.Fonts, Font{Info: font, Name: name})
}
func (w *WorkBook) addFormat(format *Format) {
w.Formats[format.Head.Index] = format
}
func (w *WorkBook) addSheet(sheet *boundsheet, buf io.ReadSeeker) {
name, _ := w.parseString(buf, uint16(sheet.Name))
w.sheets = append(w.sheets, &WorkSheet{id: len(w.sheets), bs: sheet, Name: name, wb: w})
}
// prepare process workbook struct
func (w *WorkBook) prepare() {
for k, v := range builtInNumFmt {
if _, ok := w.Formats[k]; !ok {
w.Formats[k] = &Format{
Raw: strings.Split(v, ";"),
}
}
}
for _, v := range w.Formats {
v.Prepare()
}
}
//reading a sheet from the compress file to memory, you should call this before you try to get anything from sheet
func (w *WorkBook) prepareSheet(sheet *WorkSheet) {
w.rs.Seek(int64(sheet.bs.Filepos), 0)
sheet.parse(w.rs)
}
func (w *WorkBook) parseString(buf io.ReadSeeker, size uint16) (res string, err error) {
2015-03-19 10:39:41 +01:00
if w.Is5ver {
var bts = make([]byte, size)
2016-06-23 04:22:57 +02:00
_, err = buf.Read(bts)
2018-04-05 15:28:01 +02:00
res = string(bytes.Trim(bts, "\r\n\t "))
2015-03-19 10:39:41 +01:00
} else {
var richtext_num = uint16(0)
var phonetic_size = uint32(0)
2015-03-19 10:39:41 +01:00
var flag byte
2016-06-23 04:22:57 +02:00
err = binary.Read(buf, binary.LittleEndian, &flag)
2015-03-19 10:39:41 +01:00
if flag&0x8 != 0 {
2016-06-23 04:22:57 +02:00
err = binary.Read(buf, binary.LittleEndian, &richtext_num)
} else if w.continue_rich > 0 {
richtext_num = w.continue_rich
w.continue_rich = 0
2015-03-19 10:39:41 +01:00
}
if flag&0x4 != 0 {
2016-06-23 04:22:57 +02:00
err = binary.Read(buf, binary.LittleEndian, &phonetic_size)
} else if w.continue_apsb > 0 {
phonetic_size = w.continue_apsb
w.continue_apsb = 0
2015-03-19 10:39:41 +01:00
}
if flag&0x1 != 0 {
var bts = make([]uint16, size)
var i = uint16(0)
for ; i < size && err == nil; i++ {
err = binary.Read(buf, binary.LittleEndian, &bts[i])
}
runes := utf16.Decode(bts[:i])
2018-04-05 15:28:01 +02:00
res = strings.Trim(string(runes), "\r\n\t ")
if i < size {
w.continue_utf16 = size - i + 1
}
2015-03-19 10:39:41 +01:00
} else {
var bts = make([]byte, size)
2016-06-23 04:22:57 +02:00
var n int
n, err = buf.Read(bts)
if uint16(n) < size {
w.continue_utf16 = size - uint16(n)
err = io.EOF
}
2017-07-26 03:39:26 +02:00
var bts1 = make([]uint16, n)
2016-06-23 04:22:57 +02:00
for k, v := range bts[:n] {
2016-06-20 03:39:08 +02:00
bts1[k] = uint16(v)
}
runes := utf16.Decode(bts1)
2018-04-05 15:28:01 +02:00
res = strings.Trim(string(runes), "\r\n\t ")
2015-03-19 10:39:41 +01:00
}
if richtext_num > 0 {
2015-03-19 10:39:41 +01:00
var bts []byte
2018-04-05 15:28:01 +02:00
var ss int64
2015-03-19 10:39:41 +01:00
if w.Is5ver {
2018-04-05 15:28:01 +02:00
ss = int64(2 * richtext_num)
2015-03-19 10:39:41 +01:00
} else {
2018-04-05 15:28:01 +02:00
ss = int64(4 * richtext_num)
2015-03-19 10:39:41 +01:00
}
2018-04-05 15:28:01 +02:00
bts = make([]byte, ss)
2016-06-23 04:22:57 +02:00
err = binary.Read(buf, binary.LittleEndian, bts)
if err == io.EOF {
w.continue_rich = richtext_num
}
2015-03-19 10:39:41 +01:00
}
if phonetic_size > 0 {
2015-03-19 10:39:41 +01:00
var bts []byte
bts = make([]byte, phonetic_size)
2016-06-23 04:22:57 +02:00
err = binary.Read(buf, binary.LittleEndian, bts)
if err == io.EOF {
w.continue_apsb = phonetic_size
}
2015-03-19 10:39:41 +01:00
}
}
2016-06-23 04:22:57 +02:00
return
2015-03-19 10:39:41 +01:00
}
2018-04-05 15:28:01 +02:00
// Format format value to string
func (w *WorkBook) Format(xf uint16, v float64) (string, bool) {
var val string
var idx = int(xf)
if len(w.Xfs) > idx {
if formatter := w.Formats[w.Xfs[idx].FormatNo()]; nil != formatter {
return formatter.String(v), true
}
}
2015-03-19 10:39:41 +01:00
2018-04-05 15:28:01 +02:00
return val, false
2015-03-19 10:39:41 +01:00
}
2018-04-05 15:28:01 +02:00
//GetSheet get one sheet by its number
2015-09-30 05:17:25 +02:00
func (w *WorkBook) GetSheet(num int) *WorkSheet {
if num < len(w.sheets) {
s := w.sheets[num]
if !s.parsed {
w.prepareSheet(s)
}
2015-09-30 05:17:25 +02:00
return s
}
2018-04-05 15:28:01 +02:00
return nil
2015-09-30 05:17:25 +02:00
}
//Get the number of all sheets, look into example
func (w *WorkBook) NumSheets() int {
return len(w.sheets)
}
2018-04-05 15:28:01 +02:00
//ReadAllCells helper function to read all cells from file
2016-05-28 04:39:46 +02:00
//Notice: the max value is the limit of the max capacity of lines.
2018-04-05 15:28:01 +02:00
//Warning: the helper function will need big memory if file is large.
2015-03-24 06:06:52 +01:00
func (w *WorkBook) ReadAllCells(max int) (res [][]string) {
res = make([][]string, 0)
2015-09-30 05:17:25 +02:00
for _, sheet := range w.sheets {
2015-03-24 06:06:52 +01:00
if len(res) < max {
max = max - len(res)
2015-09-30 05:17:25 +02:00
w.prepareSheet(sheet)
2015-03-24 06:06:52 +01:00
if sheet.MaxRow != 0 {
2018-04-05 15:28:01 +02:00
length := int(sheet.MaxRow) + 1
if max < length {
length = max
2015-03-24 06:06:52 +01:00
}
2018-04-05 15:28:01 +02:00
temp := make([][]string, length)
2017-02-21 09:27:01 +01:00
for k, row := range sheet.rows {
2015-03-24 06:06:52 +01:00
data := make([]string, 0)
2017-02-21 09:27:01 +01:00
if len(row.cols) > 0 {
for _, col := range row.cols {
2015-03-24 06:06:52 +01:00
if uint16(len(data)) <= col.LastCol() {
data = append(data, make([]string, col.LastCol()-uint16(len(data))+1)...)
}
str := col.String(w)
2016-09-30 05:46:51 +02:00
2015-03-24 06:06:52 +01:00
for i := uint16(0); i < col.LastCol()-col.FirstCol()+1; i++ {
data[col.FirstCol()+i] = str[i]
}
2015-03-19 10:39:41 +01:00
}
2018-04-05 15:28:01 +02:00
if length > int(k) {
2015-03-24 06:06:52 +01:00
temp[k] = data
2015-03-19 10:39:41 +01:00
}
}
}
2015-03-24 06:06:52 +01:00
res = append(res, temp...)
2015-03-19 10:39:41 +01:00
}
}
}
return
}