2015-03-19 17:39:41 +08:00
|
|
|
package xls
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"encoding/binary"
|
2018-12-04 00:16:44 +03:00
|
|
|
"golang.org/x/text/encoding/charmap"
|
2015-03-19 17:39:41 +08:00
|
|
|
"io"
|
2016-05-28 10:39:46 +08:00
|
|
|
"os"
|
2015-03-19 17:39:41 +08:00
|
|
|
"unicode/utf16"
|
|
|
|
)
|
|
|
|
|
2015-09-30 10:40:01 +08:00
|
|
|
//xls workbook type
|
2015-03-19 17:39:41 +08:00
|
|
|
type WorkBook struct {
|
2015-09-30 11:17:25 +08:00
|
|
|
Is5ver bool
|
|
|
|
Type uint16
|
|
|
|
Codepage uint16
|
|
|
|
Xfs []st_xf_data
|
|
|
|
Fonts []Font
|
|
|
|
Formats map[uint16]*Format
|
|
|
|
//All the sheets from the workbook
|
|
|
|
sheets []*WorkSheet
|
2015-08-25 14:21:00 +08:00
|
|
|
Author string
|
|
|
|
rs io.ReadSeeker
|
|
|
|
sst []string
|
|
|
|
continue_utf16 uint16
|
2016-07-01 21:29:00 +08:00
|
|
|
continue_rich uint16
|
|
|
|
continue_apsb uint32
|
2015-11-25 14:50:29 +01:00
|
|
|
dateMode uint16
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
|
2015-09-30 10:40:01 +08:00
|
|
|
//read workbook from ole2 file
|
|
|
|
func newWorkBookFromOle2(rs io.ReadSeeker) *WorkBook {
|
2015-03-19 17:39:41 +08:00
|
|
|
wb := new(WorkBook)
|
|
|
|
wb.Formats = make(map[uint16]*Format)
|
|
|
|
// wb.bts = bts
|
|
|
|
wb.rs = rs
|
2015-09-30 11:17:25 +08:00
|
|
|
wb.sheets = make([]*WorkSheet, 0)
|
2015-03-19 17:39:41 +08:00
|
|
|
wb.Parse(rs)
|
|
|
|
return wb
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *WorkBook) Parse(buf io.ReadSeeker) {
|
2015-09-30 10:40:01 +08:00
|
|
|
b := new(bof)
|
|
|
|
bof_pre := new(bof)
|
2015-03-19 17:39:41 +08:00
|
|
|
// buf := bytes.NewReader(bts)
|
|
|
|
offset := 0
|
|
|
|
for {
|
2015-09-30 10:40:01 +08:00
|
|
|
if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
|
|
|
|
bof_pre, b, offset = w.parseBof(buf, b, bof_pre, offset)
|
2015-03-19 17:39:41 +08:00
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *WorkBook) addXf(xf st_xf_data) {
|
|
|
|
w.Xfs = append(w.Xfs, xf)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *WorkBook) addFont(font *FontInfo, buf io.ReadSeeker) {
|
2016-06-23 10:22:57 +08:00
|
|
|
name, _ := w.get_string(buf, uint16(font.NameB))
|
2015-03-19 17:39:41 +08:00
|
|
|
w.Fonts = append(w.Fonts, Font{Info: font, Name: name})
|
|
|
|
}
|
|
|
|
|
2015-03-25 14:47:26 +08:00
|
|
|
func (w *WorkBook) addFormat(format *Format) {
|
2016-05-28 10:39:46 +08:00
|
|
|
if w.Formats == nil {
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
2015-03-25 14:47:26 +08:00
|
|
|
w.Formats[format.Head.Index] = format
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
|
2015-09-30 10:40:01 +08:00
|
|
|
func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int) (after *bof, after_using *bof, offset int) {
|
2015-03-19 17:39:41 +08:00
|
|
|
after = b
|
2015-08-25 14:21:00 +08:00
|
|
|
after_using = pre
|
2015-03-19 17:39:41 +08:00
|
|
|
var bts = make([]byte, b.Size)
|
|
|
|
binary.Read(buf, binary.LittleEndian, bts)
|
|
|
|
buf_item := bytes.NewReader(bts)
|
|
|
|
switch b.Id {
|
|
|
|
case 0x809:
|
2015-09-30 10:40:01 +08:00
|
|
|
bif := new(biffHeader)
|
2015-03-19 17:39:41 +08:00
|
|
|
binary.Read(buf_item, binary.LittleEndian, bif)
|
|
|
|
if bif.Ver != 0x600 {
|
|
|
|
wb.Is5ver = true
|
|
|
|
}
|
|
|
|
wb.Type = bif.Type
|
|
|
|
case 0x042: // CODEPAGE
|
|
|
|
binary.Read(buf_item, binary.LittleEndian, &wb.Codepage)
|
|
|
|
case 0x3c: // CONTINUE
|
|
|
|
if pre.Id == 0xfc {
|
|
|
|
var size uint16
|
2015-08-25 14:21:00 +08:00
|
|
|
var err error
|
2016-06-23 10:22:57 +08:00
|
|
|
if wb.continue_utf16 >= 1 {
|
2015-08-25 14:21:00 +08:00
|
|
|
size = wb.continue_utf16
|
|
|
|
wb.continue_utf16 = 0
|
|
|
|
} else {
|
|
|
|
err = binary.Read(buf_item, binary.LittleEndian, &size)
|
|
|
|
}
|
|
|
|
for err == nil && offset_pre < len(wb.sst) {
|
2016-06-23 10:22:57 +08:00
|
|
|
var str string
|
2018-12-04 00:16:44 +03:00
|
|
|
str, err = wb.get_string(buf_item, size)
|
|
|
|
wb.sst[offset_pre] = wb.sst[offset_pre] + str
|
2016-07-01 21:29:00 +08:00
|
|
|
|
2017-08-18 11:15:10 +08:00
|
|
|
if err == io.EOF {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2015-03-19 17:39:41 +08:00
|
|
|
offset_pre++
|
2015-08-25 14:21:00 +08:00
|
|
|
err = binary.Read(buf_item, binary.LittleEndian, &size)
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
offset = offset_pre
|
|
|
|
after = pre
|
2015-08-25 14:21:00 +08:00
|
|
|
after_using = b
|
2015-03-19 17:39:41 +08:00
|
|
|
case 0xfc: // SST
|
|
|
|
info := new(SstInfo)
|
|
|
|
binary.Read(buf_item, binary.LittleEndian, info)
|
|
|
|
wb.sst = make([]string, info.Count)
|
2016-06-23 10:22:57 +08:00
|
|
|
var size uint16
|
|
|
|
var i = 0
|
2018-09-13 22:10:15 +03:00
|
|
|
// dont forget to initialize offset
|
|
|
|
offset = 0
|
2016-06-23 10:22:57 +08:00
|
|
|
for ; i < int(info.Count); i++ {
|
2017-02-21 16:59:50 +08:00
|
|
|
var err error
|
2018-12-04 00:16:44 +03:00
|
|
|
err = binary.Read(buf_item, binary.LittleEndian, &size)
|
|
|
|
if err == nil {
|
2016-06-23 10:22:57 +08:00
|
|
|
var str string
|
|
|
|
str, err = wb.get_string(buf_item, size)
|
|
|
|
wb.sst[i] = wb.sst[i] + str
|
2017-02-21 16:59:50 +08:00
|
|
|
}
|
2016-09-30 11:46:51 +08:00
|
|
|
|
2017-02-21 16:59:50 +08:00
|
|
|
if err == io.EOF {
|
|
|
|
break
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
}
|
2016-06-23 10:22:57 +08:00
|
|
|
offset = i
|
2018-12-04 00:16:44 +03:00
|
|
|
case 0x85: // boundsheet
|
2015-06-16 10:29:58 +08:00
|
|
|
var bs = new(boundsheet)
|
2015-03-19 17:39:41 +08:00
|
|
|
binary.Read(buf_item, binary.LittleEndian, bs)
|
|
|
|
// different for BIFF5 and BIFF8
|
|
|
|
wb.addSheet(bs, buf_item)
|
|
|
|
case 0x0e0: // XF
|
|
|
|
if wb.Is5ver {
|
|
|
|
xf := new(Xf5)
|
|
|
|
binary.Read(buf_item, binary.LittleEndian, xf)
|
|
|
|
wb.addXf(xf)
|
|
|
|
} else {
|
|
|
|
xf := new(Xf8)
|
|
|
|
binary.Read(buf_item, binary.LittleEndian, xf)
|
|
|
|
wb.addXf(xf)
|
|
|
|
}
|
|
|
|
case 0x031: // FONT
|
|
|
|
f := new(FontInfo)
|
|
|
|
binary.Read(buf_item, binary.LittleEndian, f)
|
|
|
|
wb.addFont(f, buf_item)
|
|
|
|
case 0x41E: //FORMAT
|
2016-05-28 10:39:46 +08:00
|
|
|
font := new(Format)
|
|
|
|
binary.Read(buf_item, binary.LittleEndian, &font.Head)
|
2016-06-23 10:22:57 +08:00
|
|
|
font.str, _ = wb.get_string(buf_item, font.Head.Size)
|
2016-05-28 10:39:46 +08:00
|
|
|
wb.addFormat(font)
|
2015-11-25 14:50:29 +01:00
|
|
|
case 0x22: //DATEMODE
|
|
|
|
binary.Read(buf_item, binary.LittleEndian, &wb.dateMode)
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
2017-10-23 11:14:16 +03:00
|
|
|
func decodeWindows1251(enc []byte) string {
|
2018-12-04 00:16:44 +03:00
|
|
|
dec := charmap.Windows1251.NewDecoder()
|
|
|
|
out, _ := dec.Bytes(enc)
|
|
|
|
return string(out)
|
2017-10-23 11:14:16 +03:00
|
|
|
}
|
2016-06-23 10:22:57 +08:00
|
|
|
func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err error) {
|
2015-03-19 17:39:41 +08:00
|
|
|
if w.Is5ver {
|
|
|
|
var bts = make([]byte, size)
|
2016-06-23 10:22:57 +08:00
|
|
|
_, err = buf.Read(bts)
|
2017-10-23 11:14:16 +03:00
|
|
|
res = decodeWindows1251(bts)
|
|
|
|
//res = string(bts)
|
2015-03-19 17:39:41 +08:00
|
|
|
} else {
|
2016-07-01 21:29:00 +08:00
|
|
|
var richtext_num = uint16(0)
|
|
|
|
var phonetic_size = uint32(0)
|
2015-03-19 17:39:41 +08:00
|
|
|
var flag byte
|
2016-06-23 10:22:57 +08:00
|
|
|
err = binary.Read(buf, binary.LittleEndian, &flag)
|
2015-03-19 17:39:41 +08:00
|
|
|
if flag&0x8 != 0 {
|
2016-06-23 10:22:57 +08:00
|
|
|
err = binary.Read(buf, binary.LittleEndian, &richtext_num)
|
2016-07-01 21:29:00 +08:00
|
|
|
} else if w.continue_rich > 0 {
|
|
|
|
richtext_num = w.continue_rich
|
|
|
|
w.continue_rich = 0
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
if flag&0x4 != 0 {
|
2016-06-23 10:22:57 +08:00
|
|
|
err = binary.Read(buf, binary.LittleEndian, &phonetic_size)
|
2016-07-01 21:29:00 +08:00
|
|
|
} else if w.continue_apsb > 0 {
|
|
|
|
phonetic_size = w.continue_apsb
|
|
|
|
w.continue_apsb = 0
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
if flag&0x1 != 0 {
|
|
|
|
var bts = make([]uint16, size)
|
2015-08-25 14:21:00 +08:00
|
|
|
var i = uint16(0)
|
|
|
|
for ; i < size && err == nil; i++ {
|
|
|
|
err = binary.Read(buf, binary.LittleEndian, &bts[i])
|
|
|
|
}
|
2018-08-14 00:05:20 +03:00
|
|
|
|
2018-09-13 22:10:15 +03:00
|
|
|
// when eof found, we dont want to append last element
|
2018-08-14 00:05:20 +03:00
|
|
|
var runes []rune
|
|
|
|
if err == io.EOF {
|
2018-12-04 00:16:44 +03:00
|
|
|
i = i - 1
|
2018-08-14 00:05:20 +03:00
|
|
|
}
|
2018-12-04 00:16:44 +03:00
|
|
|
runes = utf16.Decode(bts[:i])
|
2018-08-14 00:05:20 +03:00
|
|
|
|
2015-03-19 17:39:41 +08:00
|
|
|
res = string(runes)
|
2015-08-25 14:21:00 +08:00
|
|
|
if i < size {
|
2018-12-04 00:16:44 +03:00
|
|
|
w.continue_utf16 = size - i
|
2015-08-25 14:21:00 +08:00
|
|
|
}
|
2018-12-04 00:16:44 +03:00
|
|
|
|
2015-03-19 17:39:41 +08:00
|
|
|
} else {
|
|
|
|
var bts = make([]byte, size)
|
2016-06-23 10:22:57 +08:00
|
|
|
var n int
|
|
|
|
n, err = buf.Read(bts)
|
|
|
|
if uint16(n) < size {
|
|
|
|
w.continue_utf16 = size - uint16(n)
|
|
|
|
err = io.EOF
|
|
|
|
}
|
|
|
|
|
2017-07-26 09:39:26 +08:00
|
|
|
var bts1 = make([]uint16, n)
|
2016-06-23 10:22:57 +08:00
|
|
|
for k, v := range bts[:n] {
|
2016-06-20 09:39:08 +08:00
|
|
|
bts1[k] = uint16(v)
|
|
|
|
}
|
|
|
|
runes := utf16.Decode(bts1)
|
|
|
|
res = string(runes)
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
2016-07-01 21:29:00 +08:00
|
|
|
if richtext_num > 0 {
|
2015-03-19 17:39:41 +08:00
|
|
|
var bts []byte
|
2016-07-01 21:29:00 +08:00
|
|
|
var seek_size int64
|
2015-03-19 17:39:41 +08:00
|
|
|
if w.Is5ver {
|
2016-07-01 21:29:00 +08:00
|
|
|
seek_size = int64(2 * richtext_num)
|
2015-03-19 17:39:41 +08:00
|
|
|
} else {
|
2016-07-01 21:29:00 +08:00
|
|
|
seek_size = int64(4 * richtext_num)
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
2016-07-01 21:29:00 +08:00
|
|
|
bts = make([]byte, seek_size)
|
2016-06-23 10:22:57 +08:00
|
|
|
err = binary.Read(buf, binary.LittleEndian, bts)
|
2016-07-01 21:29:00 +08:00
|
|
|
if err == io.EOF {
|
|
|
|
w.continue_rich = richtext_num
|
|
|
|
}
|
|
|
|
|
|
|
|
// err = binary.Read(buf, binary.LittleEndian, bts)
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
2016-07-01 21:29:00 +08:00
|
|
|
if phonetic_size > 0 {
|
2015-03-19 17:39:41 +08:00
|
|
|
var bts []byte
|
|
|
|
bts = make([]byte, phonetic_size)
|
2016-06-23 10:22:57 +08:00
|
|
|
err = binary.Read(buf, binary.LittleEndian, bts)
|
2016-07-01 21:29:00 +08:00
|
|
|
if err == io.EOF {
|
|
|
|
w.continue_apsb = phonetic_size
|
|
|
|
}
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
}
|
2016-06-23 10:22:57 +08:00
|
|
|
return
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
|
2015-06-16 10:29:58 +08:00
|
|
|
func (w *WorkBook) addSheet(sheet *boundsheet, buf io.ReadSeeker) {
|
2016-06-23 10:22:57 +08:00
|
|
|
name, _ := w.get_string(buf, uint16(sheet.Name))
|
2015-09-30 11:17:25 +08:00
|
|
|
w.sheets = append(w.sheets, &WorkSheet{bs: sheet, Name: name, wb: w})
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
|
2015-06-16 10:37:11 +08:00
|
|
|
//reading a sheet from the compress file to memory, you should call this before you try to get anything from sheet
|
2015-09-30 11:17:25 +08:00
|
|
|
func (w *WorkBook) prepareSheet(sheet *WorkSheet) {
|
2015-03-19 17:39:41 +08:00
|
|
|
w.rs.Seek(int64(sheet.bs.Filepos), 0)
|
2015-06-16 10:37:11 +08:00
|
|
|
sheet.parse(w.rs)
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
|
2016-01-21 13:14:37 +08:00
|
|
|
//Get one sheet by its number
|
2015-09-30 11:17:25 +08:00
|
|
|
func (w *WorkBook) GetSheet(num int) *WorkSheet {
|
|
|
|
if num < len(w.sheets) {
|
|
|
|
s := w.sheets[num]
|
2016-01-21 13:14:37 +08:00
|
|
|
if !s.parsed {
|
|
|
|
w.prepareSheet(s)
|
|
|
|
}
|
2015-09-30 11:17:25 +08:00
|
|
|
return s
|
|
|
|
} else {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-21 13:14:37 +08:00
|
|
|
//Get the number of all sheets, look into example
|
|
|
|
func (w *WorkBook) NumSheets() int {
|
|
|
|
return len(w.sheets)
|
|
|
|
}
|
|
|
|
|
2015-06-16 10:37:11 +08:00
|
|
|
//helper function to read all cells from file
|
2016-05-28 10:39:46 +08:00
|
|
|
//Notice: the max value is the limit of the max capacity of lines.
|
2016-07-01 21:29:00 +08:00
|
|
|
//Warning: the helper function will need big memeory if file is large.
|
2015-03-24 13:06:52 +08:00
|
|
|
func (w *WorkBook) ReadAllCells(max int) (res [][]string) {
|
|
|
|
res = make([][]string, 0)
|
2015-09-30 11:17:25 +08:00
|
|
|
for _, sheet := range w.sheets {
|
2015-03-24 13:06:52 +08:00
|
|
|
if len(res) < max {
|
|
|
|
max = max - len(res)
|
2015-09-30 11:17:25 +08:00
|
|
|
w.prepareSheet(sheet)
|
2015-03-24 13:06:52 +08:00
|
|
|
if sheet.MaxRow != 0 {
|
|
|
|
leng := int(sheet.MaxRow) + 1
|
|
|
|
if max < leng {
|
|
|
|
leng = max
|
|
|
|
}
|
|
|
|
temp := make([][]string, leng)
|
2017-02-21 16:27:01 +08:00
|
|
|
for k, row := range sheet.rows {
|
2015-03-24 13:06:52 +08:00
|
|
|
data := make([]string, 0)
|
2017-02-21 16:27:01 +08:00
|
|
|
if len(row.cols) > 0 {
|
|
|
|
for _, col := range row.cols {
|
2015-03-24 13:06:52 +08:00
|
|
|
if uint16(len(data)) <= col.LastCol() {
|
|
|
|
data = append(data, make([]string, col.LastCol()-uint16(len(data))+1)...)
|
|
|
|
}
|
|
|
|
str := col.String(w)
|
2016-09-30 11:46:51 +08:00
|
|
|
|
2015-03-24 13:06:52 +08:00
|
|
|
for i := uint16(0); i < col.LastCol()-col.FirstCol()+1; i++ {
|
|
|
|
data[col.FirstCol()+i] = str[i]
|
|
|
|
}
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
2015-03-24 13:06:52 +08:00
|
|
|
if leng > int(k) {
|
|
|
|
temp[k] = data
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-03-24 13:06:52 +08:00
|
|
|
res = append(res, temp...)
|
2015-03-19 17:39:41 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|