commit fbfa8b5d0d867103a17eba9621bf99fa9cdec0a9 Author: Liu Ming Date: Thu Mar 19 17:39:41 2015 +0800 init diff --git a/bof.go b/bof.go new file mode 100644 index 0000000..d0483d7 --- /dev/null +++ b/bof.go @@ -0,0 +1,30 @@ +package xls + +import () + +type BOF struct { + Id uint16 + Size uint16 +} + +type BIFFHeader struct { + Ver uint16 + Type uint16 + Id_make uint16 + Year uint16 + Flags uint32 + Min_ver uint32 +} + +// func parseBofsForWb(bts []byte, wb *WorkBook) { +// bof := new(BOF) +// var bof_pre *BOF +// buf := bytes.NewReader(bts) +// for { +// if err := binary.Read(buf, binary.LittleEndian, bof); err == nil { +// bof_pre = bof.ActForWb(buf, wb, bof_pre) +// } else { +// break +// } +// } +// } diff --git a/col.go b/col.go new file mode 100644 index 0000000..d38a2f0 --- /dev/null +++ b/col.go @@ -0,0 +1,126 @@ +package xls + +import ( + "fmt" + "math" +) + +type Col struct { + RowB uint16 + FirstColB uint16 +} + +type Coler interface { + String(*WorkBook) []string + Row() uint16 + FirstCol() uint16 + LastCol() uint16 +} + +func (c *Col) Row() uint16 { + return c.RowB +} + +func (c *Col) FirstCol() uint16 { + return c.FirstColB +} + +func (c *Col) LastCol() uint16 { + return c.FirstColB +} + +func (c *Col) String(wb *WorkBook) []string { + return []string{""} +} + +type XfRk struct { + Index uint16 + Rk RK +} + +type RK uint32 + +func (rk RK) String() string { + multiplied := rk & 1 + isInt := rk & 2 + val := rk >> 2 + if isInt == 0 { + f := math.Float64frombits(uint64(val) << 34) + if multiplied != 0 { + f = f / 100 + } + return fmt.Sprintf("%.1f", f) + } else { + return fmt.Sprint(val) + } +} + +type MulrkCol struct { + Col + Xfrks []XfRk + LastColB uint16 +} + +func (c *MulrkCol) LastCol() uint16 { + return c.LastColB +} + +func (c *MulrkCol) String(wb *WorkBook) []string { + var res = make([]string, len(c.Xfrks)) + for i := 0; i < len(c.Xfrks); i++ { + xfrk := c.Xfrks[i] + res[i] = xfrk.Rk.String() + } + return res +} + +type MulBlankCol struct { + Col + Xfs []uint16 + LastColB uint16 +} + +func (c *MulBlankCol) LastCol() uint16 { + return c.LastColB +} + +func (c *MulBlankCol) String(wb *WorkBook) []string { + return make([]string, len(c.Xfs)) +} + +type NumberCol struct { + Col + Index uint16 + Float float64 +} + +func (c *NumberCol) String(wb *WorkBook) []string { + return []string{fmt.Sprintf("%f", c.Float)} +} + +type FormulaCol struct { + Col +} +type RkCol struct { + Col + Xfrk XfRk +} + +func (c *RkCol) String(wb *WorkBook) []string { + return []string{c.Xfrk.Rk.String()} +} + +type LabelsstCol struct { + Col + Xf uint16 + Sst uint32 +} + +func (c *LabelsstCol) String(wb *WorkBook) []string { + return []string{wb.sst[int(c.Sst)]} +} + +type BlankCol struct { + Col + Xf uint16 +} diff --git a/font.go b/font.go new file mode 100644 index 0000000..a85df50 --- /dev/null +++ b/font.go @@ -0,0 +1,19 @@ +package xls + +type FontInfo struct { + Height uint16 + Flag uint16 + Color uint16 + Bold uint16 + Escapement uint16 + Underline byte + Family byte + Charset byte + Notused byte + NameB byte +} + +type Font struct { + Info *FontInfo + Name string +} diff --git a/format.go b/format.go new file mode 100644 index 0000000..547d8e5 --- /dev/null +++ b/format.go @@ -0,0 +1,11 @@ +package xls + +type FormatB struct { + Index uint16 + Size uint16 +} + +type Format struct { + b *FormatB + str string +} diff --git a/row.go b/row.go new file mode 100644 index 0000000..72c0203 --- /dev/null +++ b/row.go @@ -0,0 +1,16 @@ +package xls + +type RowInfo struct { + Index uint16 + Fcell uint16 + Lcell uint16 + Height uint16 + Notused uint16 + Notused2 uint16 + Flags uint32 +} + +type Row struct { + info *RowInfo + Cols map[uint16]Coler +} diff --git a/sst.go b/sst.go new file mode 100644 index 0000000..3c92e39 --- /dev/null +++ b/sst.go @@ -0,0 +1,6 @@ +package xls + +type SstInfo struct { + Total uint32 + Count uint32 +} diff --git a/st_xf_data.go b/st_xf_data.go new file mode 100644 index 0000000..3d49921 --- /dev/null +++ b/st_xf_data.go @@ -0,0 +1,17 @@ +package xls + +// type st_xf_data struct { +// Font uint16 +// Format uint16 +// Type uint16 +// Align byte +// Rotation byte +// Ident byte +// Usedattr byte +// Linestyle uint32 +// Linecolor uint32 +// Groundcolor uint16 +// } + +type st_xf_data interface { +} diff --git a/workbook.go b/workbook.go new file mode 100644 index 0000000..30cab85 --- /dev/null +++ b/workbook.go @@ -0,0 +1,227 @@ +package xls + +import ( + "bytes" + "encoding/binary" + "io" + "log" + "unicode/utf16" +) + +type WorkBook struct { + Is5ver bool + Type uint16 + Codepage uint16 + Xfs []st_xf_data + Fonts []Font + Formats map[uint16]*Format + Sheets []*WorkSheet + Author string + rs io.ReadSeeker + sst []string +} + +func newWookBookFromOle2(rs io.ReadSeeker) *WorkBook { + wb := new(WorkBook) + wb.Formats = make(map[uint16]*Format) + // wb.bts = bts + wb.rs = rs + wb.Sheets = make([]*WorkSheet, 0) + wb.Parse(rs) + return wb +} + +func (w *WorkBook) Parse(buf io.ReadSeeker) { + bof := new(BOF) + var bof_pre *BOF + // buf := bytes.NewReader(bts) + offset := 0 + for { + if err := binary.Read(buf, binary.LittleEndian, bof); err == nil { + bof_pre, offset = w.parseBof(buf, bof, bof_pre, offset) + } else { + break + } + } +} + +func (w *WorkBook) addXf(xf st_xf_data) { + w.Xfs = append(w.Xfs, xf) +} + +func (w *WorkBook) addFont(font *FontInfo, buf io.ReadSeeker) { + name := w.get_string(buf, uint16(font.NameB)) + w.Fonts = append(w.Fonts, Font{Info: font, Name: name}) +} + +func (w *WorkBook) addFormat(format *FormatB, buf io.ReadSeeker) { + w.Formats[format.Index] = &Format{b: format, str: w.get_string(buf, uint16(format.Size))} +} + +func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *BOF, pre *BOF, offset_pre int) (after *BOF, offset int) { + after = b + var bts = make([]byte, b.Size) + binary.Read(buf, binary.LittleEndian, bts) + buf_item := bytes.NewReader(bts) + switch b.Id { + case 0x809: + bif := new(BIFFHeader) + binary.Read(buf_item, binary.LittleEndian, bif) + if bif.Ver != 0x600 { + wb.Is5ver = true + } + wb.Type = bif.Type + case 0x042: // CODEPAGE + binary.Read(buf_item, binary.LittleEndian, &wb.Codepage) + case 0x3c: // CONTINUE + var bts = make([]byte, b.Size) + binary.Read(buf_item, binary.LittleEndian, bts) + buf_item := bytes.NewReader(bts) + if pre.Id == 0xfc { + var size uint16 + if err := binary.Read(buf_item, binary.LittleEndian, &size); err == nil { + wb.sst[offset_pre] = wb.get_string(buf_item, size) + offset_pre++ + } + } + offset = offset_pre + after = pre + case 0xfc: // SST + info := new(SstInfo) + binary.Read(buf_item, binary.LittleEndian, info) + wb.sst = make([]string, info.Count) + for i := 0; i < int(info.Count); i++ { + var size uint16 + if err := binary.Read(buf_item, binary.LittleEndian, &size); err == nil || err == io.EOF { + wb.sst[i] = wb.sst[i] + wb.get_string(buf_item, size) + if err == io.EOF { + offset = i + break + } + } + } + case 0x85: // BOUNDSHEET + var bs = new(Boundsheet) + binary.Read(buf_item, binary.LittleEndian, bs) + // different for BIFF5 and BIFF8 + wb.addSheet(bs, buf_item) + case 0x0e0: // XF + if wb.Is5ver { + xf := new(Xf5) + binary.Read(buf_item, binary.LittleEndian, xf) + wb.addXf(xf) + } else { + xf := new(Xf8) + binary.Read(buf_item, binary.LittleEndian, xf) + wb.addXf(xf) + } + case 0x031: // FONT + f := new(FontInfo) + binary.Read(buf_item, binary.LittleEndian, f) + wb.addFont(f, buf_item) + case 0x41E: //FORMAT + // var bts = make([]byte, b.Size) + // binary.Read(buf, binary.LittleEndian, bts) + // buf_item := bytes.NewReader(bts) + f := new(FormatB) + binary.Read(buf_item, binary.LittleEndian, f) + wb.addFormat(f, buf_item) + // case 0x5c: + // var bts = make([]byte, b.Size) + // binary.Read(buf_item, binary.LittleEndian, bts) + // if wb.Is5ver { + // wb.Author = wb.get_string_from_bytes(bts[1:], uint16(bts[1])) + // } else { + // size := binary.LittleEndian.Uint16(bts) + // wb.Author = wb.get_string_from_bytes(bts[2:], size) + // } + } + return +} + +func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) string { + var res string + if w.Is5ver { + var bts = make([]byte, size) + buf.Read(bts) + return string(bts) + } else { + var richtext_num uint16 + var phonetic_size uint32 + var flag byte + binary.Read(buf, binary.LittleEndian, &flag) + log.Println(flag, size) + if flag&0x8 != 0 { + binary.Read(buf, binary.LittleEndian, &richtext_num) + } + if flag&0x4 != 0 { + binary.Read(buf, binary.LittleEndian, &phonetic_size) + } + if flag&0x1 != 0 { + var bts = make([]uint16, size) + binary.Read(buf, binary.LittleEndian, &bts) + runes := utf16.Decode(bts) + res = string(runes) + } else { + var bts = make([]byte, size) + binary.Read(buf, binary.LittleEndian, &bts) + res = string(bts) + } + if flag&0x8 != 0 { + var bts []byte + if w.Is5ver { + bts = make([]byte, 2*richtext_num) + } else { + bts = make([]byte, 4*richtext_num) + } + binary.Read(buf, binary.LittleEndian, bts) + } + if flag&0x4 != 0 { + var bts []byte + bts = make([]byte, phonetic_size) + binary.Read(buf, binary.LittleEndian, bts) + } + } + return res +} + +func (w *WorkBook) get_string_from_bytes(bts []byte, size uint16) string { + buf := bytes.NewReader(bts) + return w.get_string(buf, size) +} + +func (w *WorkBook) addSheet(sheet *Boundsheet, buf io.ReadSeeker) { + name := w.get_string(buf, uint16(sheet.Name)) + w.Sheets = append(w.Sheets, &WorkSheet{bs: sheet, Name: name, wb: w}) +} + +func (w *WorkBook) PrepareSheet(sheet *WorkSheet) { + w.rs.Seek(int64(sheet.bs.Filepos), 0) + sheet.Parse(w.rs) +} + +func (w *WorkBook) ReadAllCells() (res [][]string) { + for _, sheet := range w.Sheets { + w.PrepareSheet(sheet) + if sheet.MaxRow != 0 { + temp := make([][]string, sheet.MaxRow+1) + for k, row := range sheet.Rows { + data := make([]string, 0) + if len(row.Cols) > 0 { + for _, col := range row.Cols { + if uint16(len(data)) <= col.LastCol() { + data = append(data, make([]string, col.LastCol()-uint16(len(data))+1)...) + } + str := col.String(w) + for i := uint16(0); i < col.LastCol()-col.FirstCol()+1; i++ { + data[col.FirstCol()+i] = str[i] + } + } + temp[k] = data + } + } + res = append(res, temp...) + } + } + return +} diff --git a/worksheet.go b/worksheet.go new file mode 100644 index 0000000..065ab5c --- /dev/null +++ b/worksheet.go @@ -0,0 +1,114 @@ +package xls + +import ( + "encoding/binary" + "fmt" + "io" +) + +type Boundsheet struct { + Filepos uint32 + Type byte + Visible byte + Name byte +} + +type WorkSheet struct { + bs *Boundsheet + wb *WorkBook + Name string + Rows map[uint16]*Row + MaxRow uint16 +} + +func (w *WorkSheet) Parse(buf io.ReadSeeker) { + w.Rows = make(map[uint16]*Row) + bof := new(BOF) + var bof_pre *BOF + for { + if err := binary.Read(buf, binary.LittleEndian, bof); err == nil { + bof_pre = w.parseBof(buf, bof, bof_pre) + } else { + fmt.Println(err) + break + } + } +} + +func (w *WorkSheet) parseBof(buf io.ReadSeeker, bof *BOF, pre *BOF) *BOF { + var col Coler + switch bof.Id { + case 0x0E5: //MERGEDCELLS + // ws.mergedCells(buf) + case 0x208: //ROW + r := new(RowInfo) + binary.Read(buf, binary.LittleEndian, r) + w.addRow(r) + case 0x0BD: //MULRK + mc := new(MulrkCol) + size := (bof.Size - 6) / 6 + binary.Read(buf, binary.LittleEndian, &mc.Col) + mc.Xfrks = make([]XfRk, size) + for i := uint16(0); i < size; i++ { + binary.Read(buf, binary.LittleEndian, &mc.Xfrks[i]) + } + binary.Read(buf, binary.LittleEndian, &mc.LastColB) + col = mc + case 0x0BE: //MULBLANK + mc := new(MulBlankCol) + size := (bof.Size - 6) / 2 + binary.Read(buf, binary.LittleEndian, &mc.Col) + mc.Xfs = make([]uint16, size) + for i := uint16(0); i < size; i++ { + binary.Read(buf, binary.LittleEndian, &mc.Xfs[i]) + } + binary.Read(buf, binary.LittleEndian, &mc.LastColB) + col = mc + case 0x203: //NUMBER + col = new(NumberCol) + binary.Read(buf, binary.LittleEndian, col) + case 0x06: //FORMULA + col = new(FormulaCol) + binary.Read(buf, binary.LittleEndian, col) + case 0x27e: //RK + col = new(RkCol) + binary.Read(buf, binary.LittleEndian, col) + case 0xFD: //LABELSST + col = new(LabelsstCol) + binary.Read(buf, binary.LittleEndian, col) + case 0x201: //BLANK + col = new(BlankCol) + binary.Read(buf, binary.LittleEndian, col) + default: + buf.Seek(int64(bof.Size), 1) + } + if col != nil { + w.addCell(col) + } + return bof +} + +func (w *WorkSheet) addCell(col Coler) { + var row *Row + var ok bool + if row, ok = w.Rows[col.Row()]; !ok { + info := new(RowInfo) + info.Index = col.Row() + row = w.addRow(info) + } + row.Cols[col.FirstCol()] = col +} + +func (w *WorkSheet) addRow(info *RowInfo) (row *Row) { + if info.Index > w.MaxRow { + w.MaxRow = info.Index + } + var ok bool + if row, ok = w.Rows[info.Index]; ok { + row.info = info + } else { + row = &Row{info: info, Cols: make(map[uint16]Coler)} + w.Rows[info.Index] = row + } + return +} diff --git a/xf.go b/xf.go new file mode 100644 index 0000000..d5f57d3 --- /dev/null +++ b/xf.go @@ -0,0 +1,25 @@ +package xls + +type Xf5 struct { + Font uint16 + Format uint16 + Type uint16 + Align uint16 + Color uint16 + Fill uint16 + Border uint16 + Linestyle uint16 +} + +type Xf8 struct { + Font uint16 + Format uint16 + Type uint16 + Align byte + Rotation byte + Ident byte + Usedattr byte + Linestyle uint32 + Linecolor uint32 + Groundcolor uint16 +} diff --git a/xls.go b/xls.go new file mode 100644 index 0000000..68811db --- /dev/null +++ b/xls.go @@ -0,0 +1,45 @@ +package xls + +import ( + "github.com/extrame/ole2" + "io" + "io/ioutil" +) + +func Open(file string, charset string) (*WorkBook, error) { + + if bts, err := ioutil.ReadFile(file); err == nil { + return parse(bts, charset) + } else { + return nil, err + } + +} + +func OpenReader(reader io.ReadCloser, charset string) (*WorkBook, error) { + bts, _ := ioutil.ReadAll(reader) + return parse(bts, charset) +} + +func parse(bts []byte, charset string) (*WorkBook, error) { + ole, _ := ole2.Open(bts, charset) + dir, err := ole.ListDir() + var book *ole2.File + for _, file := range dir { + name := file.Name() + if name == "Workbook" { + book = file + // break + } + if name == "Book" { + book = file + // break + } + + } + if book != nil { + wb := newWookBookFromOle2(ole.OpenFile(book)) + return wb, nil + } + return nil, err +} diff --git a/xls_test.go b/xls_test.go new file mode 100644 index 0000000..1b6271c --- /dev/null +++ b/xls_test.go @@ -0,0 +1,20 @@ +package xls + +import ( + "bytes" + "fmt" + "testing" +) + +func TestOpen(t *testing.T) { + wb, _ := Open("n201503061328.xls", "utf-8") + fmt.Println(wb.ReadAllCells()) +} + +func TestBof(t *testing.T) { + bof := new(BOF) + bof.Id = 0x41E + bof.Size = 55 + buf := bytes.NewReader([]byte{0x07, 0x00, 0x19, 0x00, 0x01, 0x22, 0x00, 0xE5, 0xFF, 0x22, 0x00, 0x23, 0x00, 0x2C, 0x00, 0x23, 0x00, 0x23, 0x00, 0x30, 0x00, 0x2E, 0x00, 0x30, 0x00, 0x30, 0x00, 0x3B, 0x00, 0x22, 0x00, 0xE5, 0xFF, 0x22, 0x00, 0x5C, 0x00, 0x2D, 0x00, 0x23, 0x00, 0x2C, 0x20, 0x00}) + new(WorkBook).parseBof(buf, bof, bof, 0) +}