20
0

Add Formula & Format support

This commit is contained in:
chen.s.g 2018-04-05 21:28:01 +08:00
parent d1d6f84447
commit 574bf55ec4
11 changed files with 786 additions and 222 deletions

View File

@ -1,8 +1,9 @@
# xls
[![GoDoc](https://godoc.org/github.com/extrame/xls?status.svg)](https://godoc.org/github.com/extrame/xls)
[![GoDoc](https://godoc.org/github.com/csg800/xls?status.svg)](https://godoc.org/github.com/csg800/xls)
Pure Golang xls library writen by [Rongshu Tech(chinese)](http://www.rongshu.tech).
Add Formula & Format support by [chen.s.g] (http://www.imohe.com)
Thanks for contributions from Tamás Gulácsi, sergeilem.

View File

@ -46,6 +46,10 @@ type HyperLink struct {
IsUrl bool
}
func (h *HyperLink) Debug(wb *WorkBook) {
fmt.Printf("hyper link col dump:%#+v\n", h)
}
//get the hyperlink string, use the public variable Url to get the original Url
func (h *HyperLink) String(wb *WorkBook) []string {
res := make([]string, h.LastColB-h.FristColB+1)

317
col.go
View File

@ -1,17 +1,31 @@
package xls
import (
"errors"
"fmt"
"math"
"strconv"
"time"
"github.com/extrame/goyymmdd"
)
var ErrIsInt = errors.New("is int")
/* Data types */
const TYPE_STRING2 = 1
const TYPE_STRING = 2
const TYPE_FORMULA = 3
const TYPE_NUMERIC = 4
const TYPE_BOOL = 5
const TYPE_NULL = 6
const TYPE_INLINE = 7
const TYPE_ERROR = 8
const TYPE_DATETIME = 9
const TYPE_PERCENTAGE = 10
const TYPE_CURRENCY = 11
//content type
type contentHandler interface {
Debug(wb *WorkBook)
String(*WorkBook) []string
FirstCol() uint16
LastCol() uint16
@ -26,6 +40,10 @@ type Coler interface {
Row() uint16
}
func (c *Col) Debug(wb *WorkBook) {
fmt.Printf("col dump:%#+v\n", c)
}
func (c *Col) Row() uint16 {
return c.RowB
}
@ -42,39 +60,12 @@ func (c *Col) String(wb *WorkBook) []string {
return []string{"default"}
}
type XfRk struct {
Index uint16
Rk RK
}
func (xf *XfRk) String(wb *WorkBook) string {
idx := int(xf.Index)
if len(wb.Xfs) > idx {
fNo := wb.Xfs[idx].formatNo()
if fNo >= 164 { // user defined format
if formatter := wb.Formats[fNo]; formatter != nil {
i, f, isFloat := xf.Rk.number()
if !isFloat {
f = float64(i)
}
t := timeFromExcelTime(f, wb.dateMode == 1)
return yymmdd.Format(t, formatter.str)
}
// see http://www.openoffice.org/sc/excelfileformat.pdf Page #174
} else if 14 <= fNo && fNo <= 17 || fNo == 22 || 27 <= fNo && fNo <= 36 || 50 <= fNo && fNo <= 58 { // jp. date format
i, f, isFloat := xf.Rk.number()
if !isFloat {
f = float64(i)
}
t := timeFromExcelTime(f, wb.dateMode == 1)
return t.Format(time.RFC3339) //TODO it should be international
}
}
return xf.Rk.String()
}
type RK uint32
func (rk RK) Debug(wb *WorkBook) {
fmt.Printf("rk dump:%#+v\n", rk)
}
func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) {
multiplied := rk & 1
isInt := rk & 2
@ -97,22 +88,40 @@ func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) {
return int64(val), 0, false
}
func (rk RK) String() string {
func (rk RK) float() float64 {
var i, f, isFloat = rk.number()
if !isFloat {
f = float64(i)
}
return f
}
func (rk RK) String(wb *WorkBook) string {
i, f, isFloat := rk.number()
if isFloat {
return strconv.FormatFloat(f, 'f', -1, 64)
}
return strconv.FormatInt(i, 10)
}
var ErrIsInt = fmt.Errorf("is int")
type XfRk struct {
Index uint16
Rk RK
}
func (rk RK) Float() (float64, error) {
_, f, isFloat := rk.number()
if !isFloat {
return 0, ErrIsInt
func (xf *XfRk) Debug(wb *WorkBook) {
fmt.Printf("xfrk dump:%#+v\n", wb.Xfs[xf.Index])
xf.Rk.Debug(wb)
}
func (xf *XfRk) String(wb *WorkBook) string {
if val, ok := wb.Format(xf.Index, xf.Rk.float()); ok {
return val
}
return f, nil
return xf.Rk.String(wb)
}
type MulrkCol struct {
@ -121,16 +130,24 @@ type MulrkCol struct {
LastColB uint16
}
func (c *MulrkCol) Debug(wb *WorkBook) {
fmt.Printf("mulrk dump:%#+v\n", c)
for _, v := range c.Xfrks {
v.Debug(wb)
}
}
func (c *MulrkCol) LastCol() uint16 {
return c.LastColB
}
func (c *MulrkCol) String(wb *WorkBook) []string {
var res = make([]string, len(c.Xfrks))
for i := 0; i < len(c.Xfrks); i++ {
xfrk := c.Xfrks[i]
res[i] = xfrk.String(wb)
for i, v := range c.Xfrks {
res[i] = v.String(wb)
}
return res
}
@ -140,6 +157,10 @@ type MulBlankCol struct {
LastColB uint16
}
func (c *MulBlankCol) Debug(wb *WorkBook) {
fmt.Printf("mul blank dump:%#+v\n", c)
}
func (c *MulBlankCol) LastCol() uint16 {
return c.LastColB
}
@ -154,23 +175,209 @@ type NumberCol struct {
Float float64
}
func (c *NumberCol) Debug(wb *WorkBook) {
fmt.Printf("number col dump:%#+v\n", c)
}
func (c *NumberCol) String(wb *WorkBook) []string {
if v, ok := wb.Format(c.Index, c.Float); ok {
return []string{v}
}
return []string{strconv.FormatFloat(c.Float, 'f', -1, 64)}
}
type FormulaCol struct {
Header struct {
type FormulaColHeader struct {
Col
IndexXf uint16
Result [8]byte
Flags uint16
_ uint32
}
// Value formula header value
func (f *FormulaColHeader) Value() float64 {
var rknumhigh = ByteToUint32(f.Result[4:8])
var rknumlow = ByteToUint32(f.Result[0:4])
var sign = (rknumhigh & 0x80000000) >> 31
var exp = ((rknumhigh & 0x7ff00000) >> 20) - 1023
var mantissa = (0x100000 | (rknumhigh & 0x000fffff))
var mantissalow1 = (rknumlow & 0x80000000) >> 31
var mantissalow2 = (rknumlow & 0x7fffffff)
var value = float64(mantissa) / math.Pow(2, float64(20-exp))
if mantissalow1 != 0 {
value += 1 / math.Pow(2, float64(21-exp))
}
value += float64(mantissalow2) / math.Pow(2, float64(52-exp))
if 0 != sign {
value *= -1
}
return value
}
// IsPart part of shared formula check
// WARNING:
// We can apparently not rely on $isPartOfSharedFormula. Even when $isPartOfSharedFormula = true
// the formula data may be ordinary formula data, therefore we need to check
// explicitly for the tExp token (0x01)
func (f *FormulaColHeader) IsPart() bool {
return 0 != (0x0008 & ByteToUint16(f.Result[6:8]))
}
type FormulaCol struct {
parsed bool
Code uint16
Btl uint16
Btc uint16
Bts []byte
Header *FormulaColHeader
ws int
vType int
value string
}
func (c *FormulaCol) Debug(wb *WorkBook) {
fmt.Printf("formula col dump:%#+v\n", c)
}
func (c *FormulaCol) Row() uint16 {
return c.Header.Col.RowB
}
func (c *FormulaCol) FirstCol() uint16 {
return c.Header.Col.FirstColB
}
func (c *FormulaCol) LastCol() uint16 {
return c.Header.Col.FirstColB
}
func (c *FormulaCol) String(wb *WorkBook) []string {
return []string{"FormulaCol"}
if !c.parsed {
c.parse(wb, true)
}
return []string{c.value}
}
func (c *FormulaCol) parse(wb *WorkBook, ref bool) {
c.parsed = true
if 0 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] {
// String formula. Result follows in appended STRING record
c.vType = TYPE_STRING
} else if 1 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] {
// Boolean formula. Result is in +2; 0=false, 1=true
c.vType = TYPE_BOOL
if 0 == c.Header.Result[3] {
c.value = "false"
} else {
c.value = "true"
}
} else if 2 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] {
// Error formula. Error code is in +2
c.vType = TYPE_ERROR
switch c.Header.Result[3] {
case 0x00:
c.value = "#NULL!"
case 0x07:
c.value = "#DIV/0"
case 0x0F:
c.value = "#VALUE!"
case 0x17:
c.value = "#REF!"
case 0x1D:
c.value = "#NAME?"
case 0x24:
c.value = "#NUM!"
case 0x2A:
c.value = "#N/A"
}
} else if 3 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] {
// Formula result is a null string
c.vType = TYPE_NULL
c.value = ""
} else {
// formula result is a number, first 14 bytes like _NUMBER record
c.vType = TYPE_NUMERIC
var flag bool
if c.isGetCurTime() {
// if date time format is not support, use time.RFC3339
if c.value, flag = wb.Format(c.Header.IndexXf, 0); !flag {
c.value = parseTime(0, time.RFC3339)
}
} else if c.isRef() {
if ref {
var ws = -1
var find bool
var rIdx uint16
var cIdx uint16
if 0x07 == c.Bts[0] {
var exi = ByteToUint16(c.Bts[3:5])
rIdx = ByteToUint16(c.Bts[5:7])
cIdx = 0x00FF & ByteToUint16(c.Bts[7:9])
if exi <= wb.ref.Num {
ws = int(wb.ref.Info[int(exi)].FirstSheetIndex)
}
} else {
ws = c.ws
rIdx = ByteToUint16(c.Bts[3:5])
cIdx = 0x00FF & ByteToUint16(c.Bts[5:7])
}
if ws < len(wb.sheets) {
if row := wb.GetSheet(ws).Row(int(rIdx)); nil != row {
find = true
c.value = row.Col(int(cIdx))
}
}
if !find {
c.value = "#REF!"
}
} else {
c.parsed = false
}
} else {
c.value, flag = wb.Format(c.Header.IndexXf, c.Header.Value())
if !flag {
c.value = strconv.FormatFloat(c.Header.Value(), 'f', -1, 64)
}
}
}
}
// isRef return cell is reference to other cell
func (c *FormulaCol) isRef() bool {
if 0x05 == c.Bts[0] && (0x24 == c.Bts[2] || 0x44 == c.Bts[2] || 0x64 == c.Bts[2]) {
return true
} else if 0x07 == c.Bts[0] && (0x3A == c.Bts[2] || 0x5A == c.Bts[2] || 0x7A == c.Bts[2]) {
return true
}
return false
}
// isGetCurTime return cell value is get current date or datetime flag
func (c *FormulaCol) isGetCurTime() bool {
var ret bool
var next byte
if 0x19 == c.Bts[2] && (0x21 == c.Bts[6] || 0x41 == c.Bts[6] || 0x61 == c.Bts[6]) {
next = c.Bts[7]
} else if 0x21 == c.Bts[2] || 0x41 == c.Bts[2] || 0x61 == c.Bts[2] {
next = c.Bts[3]
}
if 0x4A == next || 0xDD == next {
ret = true
}
return ret
}
type RkCol struct {
@ -178,6 +385,10 @@ type RkCol struct {
Xfrk XfRk
}
func (c *RkCol) Debug(wb *WorkBook) {
fmt.Printf("rk col dump:%#+v\n", c)
}
func (c *RkCol) String(wb *WorkBook) []string {
return []string{c.Xfrk.String(wb)}
}
@ -188,6 +399,10 @@ type LabelsstCol struct {
Sst uint32
}
func (c *LabelsstCol) Debug(wb *WorkBook) {
fmt.Printf("label sst col dump:%#+v\n", c)
}
func (c *LabelsstCol) String(wb *WorkBook) []string {
return []string{wb.sst[int(c.Sst)]}
}
@ -197,6 +412,10 @@ type labelCol struct {
Str string
}
func (c *labelCol) Debug(wb *WorkBook) {
fmt.Printf("label col dump:%#+v\n", c)
}
func (c *labelCol) String(wb *WorkBook) []string {
return []string{c.Str}
}
@ -206,6 +425,10 @@ type BlankCol struct {
Xf uint16
}
func (c *BlankCol) Debug(wb *WorkBook) {
fmt.Printf("blank col dump:%#+v\n", c)
}
func (c *BlankCol) String(wb *WorkBook) []string {
return []string{""}
}

79
date.go
View File

@ -5,37 +5,57 @@ import (
"time"
)
const MJD_0 float64 = 2400000.5
const MJD_JD2000 float64 = 51544.5
// timeLocationUTC defined the UTC time location.
var timeLocationUTC, _ = time.LoadLocation("UTC")
// timeToUTCTime provides function to convert time to UTC time.
func timeToUTCTime(t time.Time) time.Time {
return time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), timeLocationUTC)
}
// timeToExcelTime provides function to convert time to Excel time.
func timeToExcelTime(t time.Time) float64 {
return float64(t.UnixNano())/8.64e13 + 25569.0
}
// shiftJulianToNoon provides function to process julian date to noon.
func shiftJulianToNoon(julianDays, julianFraction float64) (float64, float64) {
switch {
case -0.5 < julianFraction && julianFraction < 0.5:
julianFraction += 0.5
case julianFraction >= 0.5:
julianDays += 1
julianDays++
julianFraction -= 0.5
case julianFraction <= -0.5:
julianDays -= 1
julianDays--
julianFraction += 1.5
}
return julianDays, julianFraction
}
// Return the integer values for hour, minutes, seconds and
// nanoseconds that comprised a given fraction of a day.
// fractionOfADay provides function to return the integer values for hour,
// minutes, seconds and nanoseconds that comprised a given fraction of a day.
// values would round to 1 us.
func fractionOfADay(fraction float64) (hours, minutes, seconds, nanoseconds int) {
f := 5184000000000000 * fraction
nanoseconds = int(math.Mod(f, 1000000000))
f = f / 1000000000
seconds = int(math.Mod(f, 60))
f = f / 3600
minutes = int(math.Mod(f, 60))
f = f / 60
hours = int(f)
return hours, minutes, seconds, nanoseconds
const (
c1us = 1e3
c1s = 1e9
c1day = 24 * 60 * 60 * c1s
)
frac := int64(c1day*fraction + c1us/2)
nanoseconds = int((frac%c1s)/c1us) * c1us
frac /= c1s
seconds = int(frac % 60)
frac /= 60
minutes = int(frac % 60)
hours = int(frac / 60)
return
}
// julianDateToGregorianTime provides function to convert julian date to
// gregorian time.
func julianDateToGregorianTime(part1, part2 float64) time.Time {
part1I, part1F := math.Modf(part1)
part2I, part2F := math.Modf(part2)
@ -47,13 +67,12 @@ func julianDateToGregorianTime(part1, part2 float64) time.Time {
return time.Date(year, time.Month(month), day, hours, minutes, seconds, nanoseconds, time.UTC)
}
// By this point generations of programmers have repeated the
// algorithm sent to the editor of "Communications of the ACM" in 1968
// (published in CACM, volume 11, number 10, October 1968, p.657).
// None of those programmers seems to have found it necessary to
// explain the constants or variable names set out by Henry F. Fliegel
// and Thomas C. Van Flandern. Maybe one day I'll buy that jounal and
// expand an explanation here - that day is not today.
// By this point generations of programmers have repeated the algorithm sent to
// the editor of "Communications of the ACM" in 1968 (published in CACM, volume
// 11, number 10, October 1968, p.657). None of those programmers seems to have
// found it necessary to explain the constants or variable names set out by
// Henry F. Fliegel and Thomas C. Van Flandern. Maybe one day I'll buy that
// jounal and expand an explanation here - that day is not today.
func doTheFliegelAndVanFlandernAlgorithm(jd int) (day, month, year int) {
l := jd + 68569
n := (4 * l) / 146097
@ -68,24 +87,26 @@ func doTheFliegelAndVanFlandernAlgorithm(jd int) (day, month, year int) {
return d, m, y
}
// Convert an excelTime representation (stored as a floating point number) to a time.Time.
// timeFromExcelTime provides function to convert an excelTime representation
// (stored as a floating point number) to a time.Time.
func timeFromExcelTime(excelTime float64, date1904 bool) time.Time {
var date time.Time
var intPart int64 = int64(excelTime)
// Excel uses Julian dates prior to March 1st 1900, and
// Gregorian thereafter.
var intPart = int64(excelTime)
// Excel uses Julian dates prior to March 1st 1900, and Gregorian
// thereafter.
if intPart <= 61 {
const OFFSET1900 = 15018.0
const OFFSET1904 = 16480.0
const MJD0 float64 = 2400000.5
var date time.Time
if date1904 {
date = julianDateToGregorianTime(MJD_0+OFFSET1904, excelTime)
date = julianDateToGregorianTime(MJD0, excelTime+OFFSET1904)
} else {
date = julianDateToGregorianTime(MJD_0+OFFSET1900, excelTime)
date = julianDateToGregorianTime(MJD0, excelTime+OFFSET1900)
}
return date
}
var floatPart float64 = excelTime - float64(intPart)
var floatPart = excelTime - float64(intPart)
var dayNanoSeconds float64 = 24 * 60 * 60 * 1000 * 1000 * 1000
if date1904 {
date = time.Date(1904, 1, 1, 0, 0, 0, 0, time.UTC)

View File

@ -2,6 +2,7 @@ package xls
import (
"fmt"
"testing"
)
func ExampleOpen() {
@ -35,3 +36,23 @@ func ExampleWorkBook_GetSheet() {
}
}
}
func BenchmarkGetSheet(b *testing.B) {
for i := 0; i < b.N; i++ {
if xlFile, err := Open("Table.xls", "utf-8"); err == nil {
for i := 0; i < xlFile.NumSheets(); i++ {
xlFile.GetSheet(i)
}
}
}
}
func BenchmarkGetSheetWithBuffer(b *testing.B) {
for i := 0; i < b.N; i++ {
if xlFile, err := OpenWithBuffer("Table.xls", "utf-8"); err == nil {
for i := 0; i < xlFile.NumSheets(); i++ {
xlFile.GetSheet(i)
}
}
}
}

221
format.go
View File

@ -1,9 +1,228 @@
package xls
import (
"regexp"
"strconv"
"strings"
"time"
)
// Excel styles can reference number formats that are built-in, all of which
// have an id less than 164. This is a possibly incomplete list comprised of as
// many of them as I could find.
var builtInNumFmt = map[uint16]string{
0: "general",
1: "0",
2: "0.00",
3: "#,##0",
4: "#,##0.00",
9: "0%",
10: "0.00%",
11: "0.00e+00",
12: "# ?/?",
13: "# ??/??",
14: "mm-dd-yy",
15: "d-mmm-yy",
16: "d-mmm",
17: "mmm-yy",
18: "h:mm am/pm",
19: "h:mm:ss am/pm",
20: "h:mm",
21: "h:mm:ss",
22: "m/d/yy h:mm",
37: "#,##0 ;(#,##0)",
38: "#,##0 ;[red](#,##0)",
39: "#,##0.00;(#,##0.00)",
40: "#,##0.00;[red](#,##0.00)",
41: `_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)`,
42: `_("$"* #,##0_);_("$* \(#,##0\);_("$"* "-"_);_(@_)`,
43: `_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)`,
44: `_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)`,
45: "mm:ss",
46: "[h]:mm:ss",
47: "mmss.0",
48: "##0.0e+0",
49: "@",
58: time.RFC3339,
}
// Excel date time mapper to go system
var dateTimeMapper = []struct{ xls, golang string }{
{"yyyy", "2006"},
{"yy", "06"},
{"mmmm", "%%%%"},
{"dddd", "&&&&"},
{"dd", "02"},
{"d", "2"},
{"mmm", "Jan"},
{"mmss", "0405"},
{"ss", "05"},
{"mm:", "04:"},
{":mm", ":04"},
{"mm", "01"},
{"am/pm", "pm"},
{"m/", "1/"},
{"%%%%", "January"},
{"&&&&", "Monday"},
}
// Format value interface
type Format struct {
Head struct {
Index uint16
Size uint16
}
str string
Raw []string
bts int
vType int
}
// Prepare format meta data
func (f *Format) Prepare() {
var regexColor = regexp.MustCompile("^\\[[a-zA-Z]+\\]")
var regexFraction = regexp.MustCompile("#\\,?#*")
for k, v := range f.Raw {
// In Excel formats, "_" is used to add spacing, which we can't do in HTML
v = strings.Replace(v, "_", "", -1)
// Some non-number characters are escaped with \, which we don't need
v = strings.Replace(v, "\\", "", -1)
// Some non-number strings are quoted, so we'll get rid of the quotes, likewise any positional * symbols
v = strings.Replace(v, "*", "", -1)
v = strings.Replace(v, "\"", "", -1)
// strip ()
v = strings.Replace(v, "(", "", -1)
v = strings.Replace(v, ")", "", -1)
// strip color information
v = regexColor.ReplaceAllString(v, "")
// Strip #
v = regexFraction.ReplaceAllString(v, "")
if 0 == f.vType {
if regexp.MustCompile("^(\\[\\$[A-Z]*-[0-9A-F]*\\])*[hmsdy]").MatchString(v) {
f.vType = TYPE_DATETIME
} else if strings.HasSuffix(v, "%") {
f.vType = TYPE_PERCENTAGE
} else if strings.HasPrefix(v, "$") || strings.HasPrefix(v, "¥") {
f.vType = TYPE_CURRENCY
}
}
f.Raw[k] = strings.Trim(v, "\r\n\t ")
}
if 0 == f.vType {
f.vType = TYPE_NUMERIC
}
if TYPE_NUMERIC == f.vType || TYPE_CURRENCY == f.vType || TYPE_PERCENTAGE == f.vType {
var t []string
if t = strings.SplitN(f.Raw[0], ".", 2); 2 == len(t) {
f.bts = strings.Count(t[1], "")
if f.bts > 0 {
f.bts = f.bts - 1
}
}
}
}
// String format content to spec string
// see http://www.openoffice.org/sc/excelfileformat.pdf Page #174
func (f *Format) String(v float64) string {
var ret string
switch f.vType {
case TYPE_NUMERIC:
if 0 == f.bts {
ret = strconv.FormatInt(int64(v), 10)
} else {
ret = strconv.FormatFloat(v, 'f', f.bts, 64)
}
case TYPE_CURRENCY:
if 0 == f.bts {
ret = strconv.FormatInt(int64(v), 10)
} else {
ret = strconv.FormatFloat(v, 'f', f.bts, 64)
}
case TYPE_PERCENTAGE:
if 0 == f.bts {
ret = strconv.FormatInt(int64(v)*100, 10) + "%"
} else {
ret = strconv.FormatFloat(v*100, 'f', f.bts, 64) + "%"
}
case TYPE_DATETIME:
ret = parseTime(v, f.Raw[0])
default:
ret = strconv.FormatFloat(v, 'f', -1, 64)
}
return ret
}
// ByteToUint32 Read 32-bit unsigned integer
func ByteToUint32(b []byte) uint32 {
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
// ByteToUint16 Read 16-bit unsigned integer
func ByteToUint16(b []byte) uint16 {
return (uint16(b[0]) | (uint16(b[1]) << 8))
}
// parseTime provides function to returns a string parsed using time.Time.
// Replace Excel placeholders with Go time placeholders. For example, replace
// yyyy with 2006. These are in a specific order, due to the fact that m is used
// in month, minute, and am/pm. It would be easier to fix that with regular
// expressions, but if it's possible to keep this simple it would be easier to
// maintain. Full-length month and days (e.g. March, Tuesday) have letters in
// them that would be replaced by other characters below (such as the 'h' in
// March, or the 'd' in Tuesday) below. First we convert them to arbitrary
// characters unused in Excel Date formats, and then at the end, turn them to
// what they should actually be.
// Based off: http://www.ozgrid.com/Excel/CustomFormats.htm
func parseTime(v float64, f string) string {
var val time.Time
if 0 == v {
val = time.Now()
} else {
val = timeFromExcelTime(v, false)
}
// It is the presence of the "am/pm" indicator that determines if this is
// a 12 hour or 24 hours time format, not the number of 'h' characters.
if is12HourTime(f) {
f = strings.Replace(f, "hh", "03", 1)
f = strings.Replace(f, "h", "3", 1)
} else {
f = strings.Replace(f, "hh", "15", 1)
f = strings.Replace(f, "h", "15", 1)
}
for _, repl := range dateTimeMapper {
f = strings.Replace(f, repl.xls, repl.golang, 1)
}
// If the hour is optional, strip it out, along with the possible dangling
// colon that would remain.
if val.Hour() < 1 {
f = strings.Replace(f, "]:", "]", 1)
f = strings.Replace(f, "[03]", "", 1)
f = strings.Replace(f, "[3]", "", 1)
f = strings.Replace(f, "[15]", "", 1)
} else {
f = strings.Replace(f, "[3]", "3", 1)
f = strings.Replace(f, "[15]", "15", 1)
}
return val.Format(f)
}
// is12HourTime checks whether an Excel time format string is a 12 hours form.
func is12HourTime(format string) bool {
return strings.Contains(format, "am/pm") || strings.Contains(format, "AM/PM") || strings.Contains(format, "a/p") || strings.Contains(format, "A/P")
}

35
row.go
View File

@ -2,8 +2,8 @@ package xls
type rowInfo struct {
Index uint16
Fcell uint16
Lcell uint16
First uint16
Last uint16
Height uint16
Notused uint16
Notused2 uint16
@ -20,27 +20,30 @@ type Row struct {
//Col Get the Nth Col from the Row, if has not, return nil.
//Suggest use Has function to test it.
func (r *Row) Col(i int) string {
serial := uint16(i)
var val string
var serial = uint16(i)
if ch, ok := r.cols[serial]; ok {
strs := ch.String(r.wb)
return strs[0]
val = ch.String(r.wb)[0]
} else {
for _, v := range r.cols {
if v.FirstCol() <= serial && v.LastCol() >= serial {
strs := v.String(r.wb)
return strs[serial-v.FirstCol()]
}
}
}
return ""
}
val = v.String(r.wb)[serial-v.FirstCol()]
//LastCol Get the number of Last Col of the Row.
func (r *Row) LastCol() int {
return int(r.info.Lcell)
break
}
}
}
return val
}
//FirstCol Get the number of First Col of the Row.
func (r *Row) FirstCol() int {
return int(r.info.Fcell)
return int(r.info.First)
}
//LastCol Get the number of Last Col of the Row.
func (r *Row) LastCol() int {
return int(r.info.Last)
}

View File

@ -4,7 +4,7 @@ import (
"bytes"
"encoding/binary"
"io"
"os"
"strings"
"unicode/utf16"
)
@ -13,14 +13,14 @@ type WorkBook struct {
Is5ver bool
Type uint16
Codepage uint16
Xfs []st_xf_data
Xfs []XF
Fonts []Font
Formats map[uint16]*Format
//All the sheets from the workbook
sheets []*WorkSheet
Author string
rs io.ReadSeeker
sst []string
ref *extSheetRef
continue_utf16 uint16
continue_rich uint16
continue_apsb uint32
@ -29,62 +29,50 @@ type WorkBook struct {
//read workbook from ole2 file
func newWorkBookFromOle2(rs io.ReadSeeker) *WorkBook {
wb := new(WorkBook)
wb.Formats = make(map[uint16]*Format)
// wb.bts = bts
wb.rs = rs
wb.sheets = make([]*WorkSheet, 0)
wb.Parse(rs)
var wb = &WorkBook{
rs: rs,
ref: new(extSheetRef),
sheets: make([]*WorkSheet, 0),
Formats: make(map[uint16]*Format),
}
wb.parse(rs)
wb.prepare()
return wb
}
func (w *WorkBook) Parse(buf io.ReadSeeker) {
func (w *WorkBook) parse(buf io.ReadSeeker) {
b := new(bof)
bof_pre := new(bof)
// buf := bytes.NewReader(bts)
bp := new(bof)
offset := 0
for {
if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
bof_pre, b, offset = w.parseBof(buf, b, bof_pre, offset)
bp, b, offset = w.parseBof(buf, b, bp, offset)
} else {
break
}
}
}
func (w *WorkBook) addXf(xf st_xf_data) {
w.Xfs = append(w.Xfs, xf)
}
func (w *WorkBook) addFont(font *FontInfo, buf io.ReadSeeker) {
name, _ := w.get_string(buf, uint16(font.NameB))
w.Fonts = append(w.Fonts, Font{Info: font, Name: name})
}
func (w *WorkBook) addFormat(format *Format) {
if w.Formats == nil {
os.Exit(1)
}
w.Formats[format.Head.Index] = format
}
func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int) (after *bof, after_using *bof, offset int) {
after = b
after_using = pre
var bts = make([]byte, b.Size)
binary.Read(buf, binary.LittleEndian, bts)
buf_item := bytes.NewReader(bts)
item := bytes.NewReader(bts)
switch b.Id {
case 0x809:
case 0x0809: // BOF
bif := new(biffHeader)
binary.Read(buf_item, binary.LittleEndian, bif)
binary.Read(item, binary.LittleEndian, bif)
if bif.Ver != 0x600 {
wb.Is5ver = true
}
wb.Type = bif.Type
case 0x042: // CODEPAGE
binary.Read(buf_item, binary.LittleEndian, &wb.Codepage)
case 0x3c: // CONTINUE
case 0x0042: // CODEPAGE
binary.Read(item, binary.LittleEndian, &wb.Codepage)
case 0x3C: // CONTINUE
if pre.Id == 0xfc {
var size uint16
var err error
@ -92,12 +80,12 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
size = wb.continue_utf16
wb.continue_utf16 = 0
} else {
err = binary.Read(buf_item, binary.LittleEndian, &size)
err = binary.Read(item, binary.LittleEndian, &size)
}
for err == nil && offset_pre < len(wb.sst) {
var str string
if size > 0 {
str, err = wb.get_string(buf_item, size)
str, err = wb.parseString(item, size)
wb.sst[offset_pre] = wb.sst[offset_pre] + str
}
@ -106,23 +94,23 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
}
offset_pre++
err = binary.Read(buf_item, binary.LittleEndian, &size)
err = binary.Read(item, binary.LittleEndian, &size)
}
}
offset = offset_pre
after = pre
after_using = b
case 0xfc: // SST
case 0x00FC: // SST
info := new(SstInfo)
binary.Read(buf_item, binary.LittleEndian, info)
binary.Read(item, binary.LittleEndian, info)
wb.sst = make([]string, info.Count)
var size uint16
var i = 0
for ; i < int(info.Count); i++ {
var err error
if err = binary.Read(buf_item, binary.LittleEndian, &size); err == nil {
if err = binary.Read(item, binary.LittleEndian, &size); err == nil {
var str string
str, err = wb.get_string(buf_item, size)
str, err = wb.parseString(item, size)
wb.sst[i] = wb.sst[i] + str
}
@ -131,41 +119,90 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
}
}
offset = i
case 0x85: // bOUNDSHEET
case 0x0085: // SHEET
var bs = new(boundsheet)
binary.Read(buf_item, binary.LittleEndian, bs)
binary.Read(item, binary.LittleEndian, bs)
// different for BIFF5 and BIFF8
wb.addSheet(bs, buf_item)
case 0x0e0: // XF
wb.addSheet(bs, item)
case 0x0017: // EXTERNSHEET
if !wb.Is5ver {
binary.Read(item, binary.LittleEndian, &wb.ref.Num)
wb.ref.Info = make([]ExtSheetInfo, wb.ref.Num)
binary.Read(item, binary.LittleEndian, &wb.ref.Info)
}
case 0x00e0: // XF
if wb.Is5ver {
xf := new(Xf5)
binary.Read(buf_item, binary.LittleEndian, xf)
binary.Read(item, binary.LittleEndian, xf)
wb.addXf(xf)
} else {
xf := new(Xf8)
binary.Read(buf_item, binary.LittleEndian, xf)
binary.Read(item, binary.LittleEndian, xf)
wb.addXf(xf)
}
case 0x031: // FONT
case 0x0031: // FONT
f := new(FontInfo)
binary.Read(buf_item, binary.LittleEndian, f)
wb.addFont(f, buf_item)
case 0x41E: //FORMAT
font := new(Format)
binary.Read(buf_item, binary.LittleEndian, &font.Head)
font.str, _ = wb.get_string(buf_item, font.Head.Size)
wb.addFormat(font)
case 0x22: //DATEMODE
binary.Read(buf_item, binary.LittleEndian, &wb.dateMode)
binary.Read(item, binary.LittleEndian, f)
wb.addFont(f, item)
case 0x041E: //FORMAT
format := new(Format)
binary.Read(item, binary.LittleEndian, &format.Head)
if raw, err := wb.parseString(item, format.Head.Size); nil == err && "" != raw {
format.Raw = strings.Split(raw, ";")
} else {
format.Raw = []string{}
}
wb.addFormat(format)
case 0x0022: //DATEMODE
binary.Read(item, binary.LittleEndian, &wb.dateMode)
}
return
}
func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err error) {
func (w *WorkBook) addXf(xf XF) {
w.Xfs = append(w.Xfs, xf)
}
func (w *WorkBook) addFont(font *FontInfo, buf io.ReadSeeker) {
name, _ := w.parseString(buf, uint16(font.NameB))
w.Fonts = append(w.Fonts, Font{Info: font, Name: name})
}
func (w *WorkBook) addFormat(format *Format) {
w.Formats[format.Head.Index] = format
}
func (w *WorkBook) addSheet(sheet *boundsheet, buf io.ReadSeeker) {
name, _ := w.parseString(buf, uint16(sheet.Name))
w.sheets = append(w.sheets, &WorkSheet{id: len(w.sheets), bs: sheet, Name: name, wb: w})
}
// prepare process workbook struct
func (w *WorkBook) prepare() {
for k, v := range builtInNumFmt {
if _, ok := w.Formats[k]; !ok {
w.Formats[k] = &Format{
Raw: strings.Split(v, ";"),
}
}
}
for _, v := range w.Formats {
v.Prepare()
}
}
//reading a sheet from the compress file to memory, you should call this before you try to get anything from sheet
func (w *WorkBook) prepareSheet(sheet *WorkSheet) {
w.rs.Seek(int64(sheet.bs.Filepos), 0)
sheet.parse(w.rs)
}
func (w *WorkBook) parseString(buf io.ReadSeeker, size uint16) (res string, err error) {
if w.Is5ver {
var bts = make([]byte, size)
_, err = buf.Read(bts)
res = string(bts)
res = string(bytes.Trim(bts, "\r\n\t "))
} else {
var richtext_num = uint16(0)
var phonetic_size = uint32(0)
@ -190,7 +227,7 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
err = binary.Read(buf, binary.LittleEndian, &bts[i])
}
runes := utf16.Decode(bts[:i])
res = string(runes)
res = strings.Trim(string(runes), "\r\n\t ")
if i < size {
w.continue_utf16 = size - i + 1
}
@ -208,23 +245,21 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
bts1[k] = uint16(v)
}
runes := utf16.Decode(bts1)
res = string(runes)
res = strings.Trim(string(runes), "\r\n\t ")
}
if richtext_num > 0 {
var bts []byte
var seek_size int64
var ss int64
if w.Is5ver {
seek_size = int64(2 * richtext_num)
ss = int64(2 * richtext_num)
} else {
seek_size = int64(4 * richtext_num)
ss = int64(4 * richtext_num)
}
bts = make([]byte, seek_size)
bts = make([]byte, ss)
err = binary.Read(buf, binary.LittleEndian, bts)
if err == io.EOF {
w.continue_rich = richtext_num
}
// err = binary.Read(buf, binary.LittleEndian, bts)
}
if phonetic_size > 0 {
var bts []byte
@ -238,18 +273,20 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
return
}
func (w *WorkBook) addSheet(sheet *boundsheet, buf io.ReadSeeker) {
name, _ := w.get_string(buf, uint16(sheet.Name))
w.sheets = append(w.sheets, &WorkSheet{bs: sheet, Name: name, wb: w})
// Format format value to string
func (w *WorkBook) Format(xf uint16, v float64) (string, bool) {
var val string
var idx = int(xf)
if len(w.Xfs) > idx {
if formatter := w.Formats[w.Xfs[idx].FormatNo()]; nil != formatter {
return formatter.String(v), true
}
}
return val, false
}
//reading a sheet from the compress file to memory, you should call this before you try to get anything from sheet
func (w *WorkBook) prepareSheet(sheet *WorkSheet) {
w.rs.Seek(int64(sheet.bs.Filepos), 0)
sheet.parse(w.rs)
}
//Get one sheet by its number
//GetSheet get one sheet by its number
func (w *WorkBook) GetSheet(num int) *WorkSheet {
if num < len(w.sheets) {
s := w.sheets[num]
@ -257,9 +294,8 @@ func (w *WorkBook) GetSheet(num int) *WorkSheet {
w.prepareSheet(s)
}
return s
} else {
return nil
}
return nil
}
//Get the number of all sheets, look into example
@ -267,9 +303,9 @@ func (w *WorkBook) NumSheets() int {
return len(w.sheets)
}
//helper function to read all cells from file
//ReadAllCells helper function to read all cells from file
//Notice: the max value is the limit of the max capacity of lines.
//Warning: the helper function will need big memeory if file is large.
//Warning: the helper function will need big memory if file is large.
func (w *WorkBook) ReadAllCells(max int) (res [][]string) {
res = make([][]string, 0)
for _, sheet := range w.sheets {
@ -277,11 +313,11 @@ func (w *WorkBook) ReadAllCells(max int) (res [][]string) {
max = max - len(res)
w.prepareSheet(sheet)
if sheet.MaxRow != 0 {
leng := int(sheet.MaxRow) + 1
if max < leng {
leng = max
length := int(sheet.MaxRow) + 1
if max < length {
length = max
}
temp := make([][]string, leng)
temp := make([][]string, length)
for k, row := range sheet.rows {
data := make([]string, 0)
if len(row.cols) > 0 {
@ -295,7 +331,7 @@ func (w *WorkBook) ReadAllCells(max int) (res [][]string) {
data[col.FirstCol()+i] = str[i]
}
}
if leng > int(k) {
if length > int(k) {
temp[k] = data
}
}

View File

@ -2,7 +2,6 @@ package xls
import (
"encoding/binary"
"fmt"
"io"
"unicode/utf16"
)
@ -14,6 +13,18 @@ type boundsheet struct {
Name byte
}
type extSheetRef struct {
Num uint16
Info []ExtSheetInfo
}
// ExtSheetInfo external sheet references provided for named cells
type ExtSheetInfo struct {
ExternalBookIndex uint16
FirstSheetIndex uint16
LastSheetIndex uint16
}
//WorkSheet in one WorkBook
type WorkSheet struct {
bs *boundsheet
@ -22,6 +33,7 @@ type WorkSheet struct {
rows map[uint16]*Row
//NOTICE: this is the max row number of the sheet, so it should be count -1
MaxRow uint16
id int
parsed bool
}
@ -36,15 +48,14 @@ func (w *WorkSheet) Row(i int) *Row {
func (w *WorkSheet) parse(buf io.ReadSeeker) {
w.rows = make(map[uint16]*Row)
b := new(bof)
var bof_pre *bof
var bp *bof
for {
if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
bof_pre = w.parseBof(buf, b, bof_pre)
bp = w.parseBof(buf, b, bp)
if b.Id == 0xa {
break
}
} else {
fmt.Println(err)
break
}
}
@ -63,21 +74,17 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof) *bof {
case 0x0BD: //MULRK
mc := new(MulrkCol)
size := (b.Size - 6) / 6
binary.Read(buf, binary.LittleEndian, &mc.Col)
mc.Xfrks = make([]XfRk, size)
for i := uint16(0); i < size; i++ {
binary.Read(buf, binary.LittleEndian, &mc.Xfrks[i])
}
binary.Read(buf, binary.LittleEndian, &mc.Col)
binary.Read(buf, binary.LittleEndian, &mc.Xfrks)
binary.Read(buf, binary.LittleEndian, &mc.LastColB)
col = mc
case 0x0BE: //MULBLANK
mc := new(MulBlankCol)
size := (b.Size - 6) / 2
binary.Read(buf, binary.LittleEndian, &mc.Col)
mc.Xfs = make([]uint16, size)
for i := uint16(0); i < size; i++ {
binary.Read(buf, binary.LittleEndian, &mc.Xfs[i])
}
binary.Read(buf, binary.LittleEndian, &mc.Col)
binary.Read(buf, binary.LittleEndian, &mc.Xfs)
binary.Read(buf, binary.LittleEndian, &mc.LastColB)
col = mc
case 0x203: //NUMBER
@ -85,10 +92,26 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof) *bof {
binary.Read(buf, binary.LittleEndian, col)
case 0x06: //FORMULA
c := new(FormulaCol)
binary.Read(buf, binary.LittleEndian, &c.Header)
c.ws = w.id
c.Header = new(FormulaColHeader)
c.Bts = make([]byte, b.Size-20)
binary.Read(buf, binary.LittleEndian, c.Header)
binary.Read(buf, binary.LittleEndian, &c.Bts)
col = c
c.parse(w.wb, false)
if TYPE_STRING == c.vType {
binary.Read(buf, binary.LittleEndian, &c.Code)
binary.Read(buf, binary.LittleEndian, &c.Btl)
binary.Read(buf, binary.LittleEndian, &c.Btc)
var fms, fme = w.wb.parseString(buf, c.Btc)
if nil == fme {
c.value = fms
}
buf.Seek(-int64(c.Btl+4), 1)
}
case 0x27e: //RK
col = new(RkCol)
binary.Read(buf, binary.LittleEndian, col)
@ -100,18 +123,18 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof) *bof {
binary.Read(buf, binary.LittleEndian, &c.BlankCol)
var count uint16
binary.Read(buf, binary.LittleEndian, &count)
c.Str, _ = w.wb.get_string(buf, count)
c.Str, _ = w.wb.parseString(buf, count)
col = c
case 0x201: //BLANK
col = new(BlankCol)
binary.Read(buf, binary.LittleEndian, col)
case 0x1b8: //HYPERLINK
var flag uint32
var count uint32
var hy HyperLink
binary.Read(buf, binary.LittleEndian, &hy.CellRange)
buf.Seek(20, 1)
var flag uint32
binary.Read(buf, binary.LittleEndian, &flag)
var count uint32
if flag&0x14 != 0 {
binary.Read(buf, binary.LittleEndian, &count)
@ -172,7 +195,6 @@ func (w *WorkSheet) add(content interface{}) {
w.addCell(col, ch)
}
}
}
func (w *WorkSheet) addCell(col Coler, ch contentHandler) {
@ -180,33 +202,36 @@ func (w *WorkSheet) addCell(col Coler, ch contentHandler) {
}
func (w *WorkSheet) addRange(rang Ranger, ch contentHandler) {
for i := rang.FirstRow(); i <= rang.LastRow(); i++ {
w.addContent(i, ch)
}
}
func (w *WorkSheet) addContent(row_num uint16, ch contentHandler) {
func (w *WorkSheet) addContent(num uint16, ch contentHandler) {
var row *Row
var ok bool
if row, ok = w.rows[row_num]; !ok {
if row, ok = w.rows[num]; !ok {
info := new(rowInfo)
info.Index = row_num
info.Index = num
row = w.addRow(info)
}
row.cols[ch.FirstCol()] = ch
}
func (w *WorkSheet) addRow(info *rowInfo) (row *Row) {
func (w *WorkSheet) addRow(info *rowInfo) *Row {
var ok bool
var row *Row
if info.Index > w.MaxRow {
w.MaxRow = info.Index
}
var ok bool
if row, ok = w.rows[info.Index]; ok {
row.info = info
} else {
row = &Row{info: info, cols: make(map[uint16]contentHandler)}
row = &Row{info: info, cols: make(map[uint16]contentHandler, int(info.Last-info.First))}
w.rows[info.Index] = row
}
return
return row
}

12
xf.go
View File

@ -1,5 +1,9 @@
package xls
type XF interface {
FormatNo() uint16
}
type Xf5 struct {
Font uint16
Format uint16
@ -11,7 +15,7 @@ type Xf5 struct {
Linestyle uint16
}
func (x *Xf5) formatNo() uint16 {
func (x *Xf5) FormatNo() uint16 {
return x.Format
}
@ -28,10 +32,6 @@ type Xf8 struct {
Groundcolor uint16
}
func (x *Xf8) formatNo() uint16 {
func (x *Xf8) FormatNo() uint16 {
return x.Format
}
type st_xf_data interface {
formatNo() uint16
}

15
xls.go
View File

@ -1,7 +1,9 @@
package xls
import (
"bytes"
"io"
"io/ioutil"
"os"
"github.com/extrame/ole2"
@ -16,7 +18,16 @@ func Open(file string, charset string) (*WorkBook, error) {
}
}
//Open one xls file and return the closer
//OpenWithBuffer open one xls file with memory buffer
func OpenWithBuffer(file string, charset string) (*WorkBook, error) {
if fi, err := ioutil.ReadFile(file); err == nil {
return OpenReader(bytes.NewReader(fi), charset)
} else {
return nil, err
}
}
//OpenWithCloser open one xls file and return the closer
func OpenWithCloser(file string, charset string) (*WorkBook, io.Closer, error) {
if fi, err := os.Open(file); err == nil {
wb, err := OpenReader(fi, charset)
@ -26,7 +37,7 @@ func OpenWithCloser(file string, charset string) (*WorkBook, io.Closer, error) {
}
}
//Open xls file from reader
//OpenReader open xls file from reader
func OpenReader(reader io.ReadSeeker, charset string) (wb *WorkBook, err error) {
var ole *ole2.Ole
if ole, err = ole2.Open(reader, charset); err == nil {