diff --git a/README.md b/README.md index 099dbbb..fc3d229 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ # xls -[![GoDoc](https://godoc.org/github.com/extrame/xls?status.svg)](https://godoc.org/github.com/extrame/xls) +[![GoDoc](https://godoc.org/github.com/csg800/xls?status.svg)](https://godoc.org/github.com/csg800/xls) Pure Golang xls library writen by [Rongshu Tech(chinese)](http://www.rongshu.tech). +Add Formula & Format support by [chen.s.g] (http://www.imohe.com) Thanks for contributions from Tamás Gulácsi, sergeilem. diff --git a/cell_range.go b/cell_range.go index 2dde04e..ec30526 100644 --- a/cell_range.go +++ b/cell_range.go @@ -46,6 +46,10 @@ type HyperLink struct { IsUrl bool } +func (h *HyperLink) Debug(wb *WorkBook) { + fmt.Printf("hyper link col dump:%#+v\n", h) +} + //get the hyperlink string, use the public variable Url to get the original Url func (h *HyperLink) String(wb *WorkBook) []string { res := make([]string, h.LastColB-h.FristColB+1) diff --git a/col.go b/col.go index 371af91..4ed2e20 100644 --- a/col.go +++ b/col.go @@ -1,17 +1,31 @@ package xls import ( + "errors" "fmt" "math" "strconv" - "time" - - "github.com/extrame/goyymmdd" ) +var ErrIsInt = errors.New("is int") + +/* Data types */ +const TYPE_STRING2 = 1 +const TYPE_STRING = 2 +const TYPE_FORMULA = 3 +const TYPE_NUMERIC = 4 +const TYPE_BOOL = 5 +const TYPE_NULL = 6 +const TYPE_INLINE = 7 +const TYPE_ERROR = 8 +const TYPE_DATETIME = 9 +const TYPE_PERCENTAGE = 10 +const TYPE_CURRENCY = 11 + //content type type contentHandler interface { + Debug(wb *WorkBook) String(*WorkBook) []string FirstCol() uint16 LastCol() uint16 @@ -26,6 +40,10 @@ type Coler interface { Row() uint16 } +func (c *Col) Debug(wb *WorkBook) { + fmt.Printf("col dump:%#+v\n", c) +} + func (c *Col) Row() uint16 { return c.RowB } @@ -42,39 +60,12 @@ func (c *Col) String(wb *WorkBook) []string { return []string{"default"} } -type XfRk struct { - Index uint16 - Rk RK -} - -func (xf *XfRk) String(wb *WorkBook) string { - idx := int(xf.Index) - if len(wb.Xfs) > idx { - fNo := wb.Xfs[idx].formatNo() - if fNo >= 164 { // user defined format - if formatter := wb.Formats[fNo]; formatter != nil { - i, f, isFloat := xf.Rk.number() - if !isFloat { - f = float64(i) - } - t := timeFromExcelTime(f, wb.dateMode == 1) - return yymmdd.Format(t, formatter.str) - } - // see http://www.openoffice.org/sc/excelfileformat.pdf Page #174 - } else if 14 <= fNo && fNo <= 17 || fNo == 22 || 27 <= fNo && fNo <= 36 || 50 <= fNo && fNo <= 58 { // jp. date format - i, f, isFloat := xf.Rk.number() - if !isFloat { - f = float64(i) - } - t := timeFromExcelTime(f, wb.dateMode == 1) - return t.Format(time.RFC3339) //TODO it should be international - } - } - return xf.Rk.String() -} - type RK uint32 +func (rk RK) Debug(wb *WorkBook) { + fmt.Printf("rk dump:%#+v\n", rk) +} + func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) { multiplied := rk & 1 isInt := rk & 2 @@ -97,22 +88,40 @@ func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) { return int64(val), 0, false } -func (rk RK) String() string { +func (rk RK) float() float64 { + var i, f, isFloat = rk.number() + if !isFloat { + f = float64(i) + } + + return f +} + +func (rk RK) String(wb *WorkBook) string { i, f, isFloat := rk.number() if isFloat { return strconv.FormatFloat(f, 'f', -1, 64) } + return strconv.FormatInt(i, 10) } -var ErrIsInt = fmt.Errorf("is int") +type XfRk struct { + Index uint16 + Rk RK +} -func (rk RK) Float() (float64, error) { - _, f, isFloat := rk.number() - if !isFloat { - return 0, ErrIsInt +func (xf *XfRk) Debug(wb *WorkBook) { + fmt.Printf("xfrk dump:%#+v\n", wb.Xfs[xf.Index]) + xf.Rk.Debug(wb) +} + +func (xf *XfRk) String(wb *WorkBook) string { + if val, ok := wb.Format(xf.Index, xf.Rk.float()); ok { + return val } - return f, nil + + return xf.Rk.String(wb) } type MulrkCol struct { @@ -121,16 +130,24 @@ type MulrkCol struct { LastColB uint16 } +func (c *MulrkCol) Debug(wb *WorkBook) { + fmt.Printf("mulrk dump:%#+v\n", c) + + for _, v := range c.Xfrks { + v.Debug(wb) + } +} + func (c *MulrkCol) LastCol() uint16 { return c.LastColB } func (c *MulrkCol) String(wb *WorkBook) []string { var res = make([]string, len(c.Xfrks)) - for i := 0; i < len(c.Xfrks); i++ { - xfrk := c.Xfrks[i] - res[i] = xfrk.String(wb) + for i, v := range c.Xfrks { + res[i] = v.String(wb) } + return res } @@ -140,6 +157,10 @@ type MulBlankCol struct { LastColB uint16 } +func (c *MulBlankCol) Debug(wb *WorkBook) { + fmt.Printf("mul blank dump:%#+v\n", c) +} + func (c *MulBlankCol) LastCol() uint16 { return c.LastColB } @@ -154,23 +175,209 @@ type NumberCol struct { Float float64 } +func (c *NumberCol) Debug(wb *WorkBook) { + fmt.Printf("number col dump:%#+v\n", c) +} + func (c *NumberCol) String(wb *WorkBook) []string { + if v, ok := wb.Format(c.Index, c.Float); ok { + return []string{v} + } + return []string{strconv.FormatFloat(c.Float, 'f', -1, 64)} } -type FormulaCol struct { - Header struct { - Col - IndexXf uint16 - Result [8]byte - Flags uint16 - _ uint32 +type FormulaColHeader struct { + Col + IndexXf uint16 + Result [8]byte + Flags uint16 + _ uint32 +} + +// Value formula header value +func (f *FormulaColHeader) Value() float64 { + var rknumhigh = ByteToUint32(f.Result[4:8]) + var rknumlow = ByteToUint32(f.Result[0:4]) + var sign = (rknumhigh & 0x80000000) >> 31 + var exp = ((rknumhigh & 0x7ff00000) >> 20) - 1023 + var mantissa = (0x100000 | (rknumhigh & 0x000fffff)) + var mantissalow1 = (rknumlow & 0x80000000) >> 31 + var mantissalow2 = (rknumlow & 0x7fffffff) + var value = float64(mantissa) / math.Pow(2, float64(20-exp)) + + if mantissalow1 != 0 { + value += 1 / math.Pow(2, float64(21-exp)) } - Bts []byte + + value += float64(mantissalow2) / math.Pow(2, float64(52-exp)) + if 0 != sign { + value *= -1 + } + + return value +} + +// IsPart part of shared formula check +// WARNING: +// We can apparently not rely on $isPartOfSharedFormula. Even when $isPartOfSharedFormula = true +// the formula data may be ordinary formula data, therefore we need to check +// explicitly for the tExp token (0x01) +func (f *FormulaColHeader) IsPart() bool { + return 0 != (0x0008 & ByteToUint16(f.Result[6:8])) +} + +type FormulaCol struct { + parsed bool + Code uint16 + Btl uint16 + Btc uint16 + Bts []byte + Header *FormulaColHeader + ws int + vType int + value string +} + +func (c *FormulaCol) Debug(wb *WorkBook) { + fmt.Printf("formula col dump:%#+v\n", c) +} + +func (c *FormulaCol) Row() uint16 { + return c.Header.Col.RowB +} + +func (c *FormulaCol) FirstCol() uint16 { + return c.Header.Col.FirstColB +} + +func (c *FormulaCol) LastCol() uint16 { + return c.Header.Col.FirstColB } func (c *FormulaCol) String(wb *WorkBook) []string { - return []string{"FormulaCol"} + if !c.parsed { + c.parse(wb, true) + } + + return []string{c.value} +} + +func (c *FormulaCol) parse(wb *WorkBook, ref bool) { + c.parsed = true + + if 0 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] { + // String formula. Result follows in appended STRING record + c.vType = TYPE_STRING + } else if 1 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] { + // Boolean formula. Result is in +2; 0=false, 1=true + c.vType = TYPE_BOOL + if 0 == c.Header.Result[3] { + c.value = "false" + } else { + c.value = "true" + } + } else if 2 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] { + // Error formula. Error code is in +2 + c.vType = TYPE_ERROR + switch c.Header.Result[3] { + case 0x00: + c.value = "#NULL!" + case 0x07: + c.value = "#DIV/0" + case 0x0F: + c.value = "#VALUE!" + case 0x17: + c.value = "#REF!" + case 0x1D: + c.value = "#NAME?" + case 0x24: + c.value = "#NUM!" + case 0x2A: + c.value = "#N/A" + } + } else if 3 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] { + // Formula result is a null string + c.vType = TYPE_NULL + c.value = "" + } else { + // formula result is a number, first 14 bytes like _NUMBER record + c.vType = TYPE_NUMERIC + + var flag bool + if c.isGetCurTime() { + // if date time format is not support, use time.RFC3339 + if c.value, flag = wb.Format(c.Header.IndexXf, 0); !flag { + c.value = parseTime(0, time.RFC3339) + } + } else if c.isRef() { + if ref { + var ws = -1 + var find bool + var rIdx uint16 + var cIdx uint16 + + if 0x07 == c.Bts[0] { + var exi = ByteToUint16(c.Bts[3:5]) + rIdx = ByteToUint16(c.Bts[5:7]) + cIdx = 0x00FF & ByteToUint16(c.Bts[7:9]) + if exi <= wb.ref.Num { + ws = int(wb.ref.Info[int(exi)].FirstSheetIndex) + } + } else { + ws = c.ws + rIdx = ByteToUint16(c.Bts[3:5]) + cIdx = 0x00FF & ByteToUint16(c.Bts[5:7]) + } + + if ws < len(wb.sheets) { + if row := wb.GetSheet(ws).Row(int(rIdx)); nil != row { + find = true + c.value = row.Col(int(cIdx)) + } + } + if !find { + c.value = "#REF!" + } + } else { + c.parsed = false + } + } else { + c.value, flag = wb.Format(c.Header.IndexXf, c.Header.Value()) + if !flag { + c.value = strconv.FormatFloat(c.Header.Value(), 'f', -1, 64) + } + } + } +} + +// isRef return cell is reference to other cell +func (c *FormulaCol) isRef() bool { + if 0x05 == c.Bts[0] && (0x24 == c.Bts[2] || 0x44 == c.Bts[2] || 0x64 == c.Bts[2]) { + return true + } else if 0x07 == c.Bts[0] && (0x3A == c.Bts[2] || 0x5A == c.Bts[2] || 0x7A == c.Bts[2]) { + return true + } + + return false +} + +// isGetCurTime return cell value is get current date or datetime flag +func (c *FormulaCol) isGetCurTime() bool { + var ret bool + var next byte + + if 0x19 == c.Bts[2] && (0x21 == c.Bts[6] || 0x41 == c.Bts[6] || 0x61 == c.Bts[6]) { + next = c.Bts[7] + } else if 0x21 == c.Bts[2] || 0x41 == c.Bts[2] || 0x61 == c.Bts[2] { + next = c.Bts[3] + } + + if 0x4A == next || 0xDD == next { + ret = true + } + + return ret } type RkCol struct { @@ -178,6 +385,10 @@ type RkCol struct { Xfrk XfRk } +func (c *RkCol) Debug(wb *WorkBook) { + fmt.Printf("rk col dump:%#+v\n", c) +} + func (c *RkCol) String(wb *WorkBook) []string { return []string{c.Xfrk.String(wb)} } @@ -188,6 +399,10 @@ type LabelsstCol struct { Sst uint32 } +func (c *LabelsstCol) Debug(wb *WorkBook) { + fmt.Printf("label sst col dump:%#+v\n", c) +} + func (c *LabelsstCol) String(wb *WorkBook) []string { return []string{wb.sst[int(c.Sst)]} } @@ -197,6 +412,10 @@ type labelCol struct { Str string } +func (c *labelCol) Debug(wb *WorkBook) { + fmt.Printf("label col dump:%#+v\n", c) +} + func (c *labelCol) String(wb *WorkBook) []string { return []string{c.Str} } @@ -206,6 +425,10 @@ type BlankCol struct { Xf uint16 } +func (c *BlankCol) Debug(wb *WorkBook) { + fmt.Printf("blank col dump:%#+v\n", c) +} + func (c *BlankCol) String(wb *WorkBook) []string { return []string{""} } diff --git a/date.go b/date.go index b7d2d04..b6e6455 100644 --- a/date.go +++ b/date.go @@ -5,37 +5,57 @@ import ( "time" ) -const MJD_0 float64 = 2400000.5 -const MJD_JD2000 float64 = 51544.5 +// timeLocationUTC defined the UTC time location. +var timeLocationUTC, _ = time.LoadLocation("UTC") +// timeToUTCTime provides function to convert time to UTC time. +func timeToUTCTime(t time.Time) time.Time { + return time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), timeLocationUTC) +} + +// timeToExcelTime provides function to convert time to Excel time. +func timeToExcelTime(t time.Time) float64 { + return float64(t.UnixNano())/8.64e13 + 25569.0 +} + +// shiftJulianToNoon provides function to process julian date to noon. func shiftJulianToNoon(julianDays, julianFraction float64) (float64, float64) { switch { case -0.5 < julianFraction && julianFraction < 0.5: julianFraction += 0.5 case julianFraction >= 0.5: - julianDays += 1 + julianDays++ julianFraction -= 0.5 case julianFraction <= -0.5: - julianDays -= 1 + julianDays-- julianFraction += 1.5 } return julianDays, julianFraction } -// Return the integer values for hour, minutes, seconds and -// nanoseconds that comprised a given fraction of a day. +// fractionOfADay provides function to return the integer values for hour, +// minutes, seconds and nanoseconds that comprised a given fraction of a day. +// values would round to 1 us. func fractionOfADay(fraction float64) (hours, minutes, seconds, nanoseconds int) { - f := 5184000000000000 * fraction - nanoseconds = int(math.Mod(f, 1000000000)) - f = f / 1000000000 - seconds = int(math.Mod(f, 60)) - f = f / 3600 - minutes = int(math.Mod(f, 60)) - f = f / 60 - hours = int(f) - return hours, minutes, seconds, nanoseconds + + const ( + c1us = 1e3 + c1s = 1e9 + c1day = 24 * 60 * 60 * c1s + ) + + frac := int64(c1day*fraction + c1us/2) + nanoseconds = int((frac%c1s)/c1us) * c1us + frac /= c1s + seconds = int(frac % 60) + frac /= 60 + minutes = int(frac % 60) + hours = int(frac / 60) + return } +// julianDateToGregorianTime provides function to convert julian date to +// gregorian time. func julianDateToGregorianTime(part1, part2 float64) time.Time { part1I, part1F := math.Modf(part1) part2I, part2F := math.Modf(part2) @@ -47,13 +67,12 @@ func julianDateToGregorianTime(part1, part2 float64) time.Time { return time.Date(year, time.Month(month), day, hours, minutes, seconds, nanoseconds, time.UTC) } -// By this point generations of programmers have repeated the -// algorithm sent to the editor of "Communications of the ACM" in 1968 -// (published in CACM, volume 11, number 10, October 1968, p.657). -// None of those programmers seems to have found it necessary to -// explain the constants or variable names set out by Henry F. Fliegel -// and Thomas C. Van Flandern. Maybe one day I'll buy that jounal and -// expand an explanation here - that day is not today. +// By this point generations of programmers have repeated the algorithm sent to +// the editor of "Communications of the ACM" in 1968 (published in CACM, volume +// 11, number 10, October 1968, p.657). None of those programmers seems to have +// found it necessary to explain the constants or variable names set out by +// Henry F. Fliegel and Thomas C. Van Flandern. Maybe one day I'll buy that +// jounal and expand an explanation here - that day is not today. func doTheFliegelAndVanFlandernAlgorithm(jd int) (day, month, year int) { l := jd + 68569 n := (4 * l) / 146097 @@ -68,24 +87,26 @@ func doTheFliegelAndVanFlandernAlgorithm(jd int) (day, month, year int) { return d, m, y } -// Convert an excelTime representation (stored as a floating point number) to a time.Time. +// timeFromExcelTime provides function to convert an excelTime representation +// (stored as a floating point number) to a time.Time. func timeFromExcelTime(excelTime float64, date1904 bool) time.Time { var date time.Time - var intPart int64 = int64(excelTime) - // Excel uses Julian dates prior to March 1st 1900, and - // Gregorian thereafter. + var intPart = int64(excelTime) + // Excel uses Julian dates prior to March 1st 1900, and Gregorian + // thereafter. if intPart <= 61 { const OFFSET1900 = 15018.0 const OFFSET1904 = 16480.0 + const MJD0 float64 = 2400000.5 var date time.Time if date1904 { - date = julianDateToGregorianTime(MJD_0+OFFSET1904, excelTime) + date = julianDateToGregorianTime(MJD0, excelTime+OFFSET1904) } else { - date = julianDateToGregorianTime(MJD_0+OFFSET1900, excelTime) + date = julianDateToGregorianTime(MJD0, excelTime+OFFSET1900) } return date } - var floatPart float64 = excelTime - float64(intPart) + var floatPart = excelTime - float64(intPart) var dayNanoSeconds float64 = 24 * 60 * 60 * 1000 * 1000 * 1000 if date1904 { date = time.Date(1904, 1, 1, 0, 0, 0, 0, time.UTC) diff --git a/example_test.go b/example_test.go index e62fc5b..20ca64c 100644 --- a/example_test.go +++ b/example_test.go @@ -2,6 +2,7 @@ package xls import ( "fmt" + "testing" ) func ExampleOpen() { @@ -35,3 +36,23 @@ func ExampleWorkBook_GetSheet() { } } } + +func BenchmarkGetSheet(b *testing.B) { + for i := 0; i < b.N; i++ { + if xlFile, err := Open("Table.xls", "utf-8"); err == nil { + for i := 0; i < xlFile.NumSheets(); i++ { + xlFile.GetSheet(i) + } + } + } +} + +func BenchmarkGetSheetWithBuffer(b *testing.B) { + for i := 0; i < b.N; i++ { + if xlFile, err := OpenWithBuffer("Table.xls", "utf-8"); err == nil { + for i := 0; i < xlFile.NumSheets(); i++ { + xlFile.GetSheet(i) + } + } + } +} diff --git a/format.go b/format.go index 35b576c..8df7634 100644 --- a/format.go +++ b/format.go @@ -1,9 +1,228 @@ package xls +import ( + "regexp" + "strconv" + "strings" + "time" +) + +// Excel styles can reference number formats that are built-in, all of which +// have an id less than 164. This is a possibly incomplete list comprised of as +// many of them as I could find. +var builtInNumFmt = map[uint16]string{ + 0: "general", + 1: "0", + 2: "0.00", + 3: "#,##0", + 4: "#,##0.00", + 9: "0%", + 10: "0.00%", + 11: "0.00e+00", + 12: "# ?/?", + 13: "# ??/??", + 14: "mm-dd-yy", + 15: "d-mmm-yy", + 16: "d-mmm", + 17: "mmm-yy", + 18: "h:mm am/pm", + 19: "h:mm:ss am/pm", + 20: "h:mm", + 21: "h:mm:ss", + 22: "m/d/yy h:mm", + 37: "#,##0 ;(#,##0)", + 38: "#,##0 ;[red](#,##0)", + 39: "#,##0.00;(#,##0.00)", + 40: "#,##0.00;[red](#,##0.00)", + 41: `_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)`, + 42: `_("$"* #,##0_);_("$* \(#,##0\);_("$"* "-"_);_(@_)`, + 43: `_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)`, + 44: `_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)`, + 45: "mm:ss", + 46: "[h]:mm:ss", + 47: "mmss.0", + 48: "##0.0e+0", + 49: "@", + 58: time.RFC3339, +} + +// Excel date time mapper to go system +var dateTimeMapper = []struct{ xls, golang string }{ + {"yyyy", "2006"}, + {"yy", "06"}, + {"mmmm", "%%%%"}, + {"dddd", "&&&&"}, + {"dd", "02"}, + {"d", "2"}, + {"mmm", "Jan"}, + {"mmss", "0405"}, + {"ss", "05"}, + {"mm:", "04:"}, + {":mm", ":04"}, + {"mm", "01"}, + {"am/pm", "pm"}, + {"m/", "1/"}, + {"%%%%", "January"}, + {"&&&&", "Monday"}, +} + +// Format value interface type Format struct { Head struct { Index uint16 Size uint16 } - str string + Raw []string + bts int + vType int +} + +// Prepare format meta data +func (f *Format) Prepare() { + var regexColor = regexp.MustCompile("^\\[[a-zA-Z]+\\]") + var regexFraction = regexp.MustCompile("#\\,?#*") + + for k, v := range f.Raw { + // In Excel formats, "_" is used to add spacing, which we can't do in HTML + v = strings.Replace(v, "_", "", -1) + + // Some non-number characters are escaped with \, which we don't need + v = strings.Replace(v, "\\", "", -1) + + // Some non-number strings are quoted, so we'll get rid of the quotes, likewise any positional * symbols + v = strings.Replace(v, "*", "", -1) + v = strings.Replace(v, "\"", "", -1) + + // strip () + v = strings.Replace(v, "(", "", -1) + v = strings.Replace(v, ")", "", -1) + + // strip color information + v = regexColor.ReplaceAllString(v, "") + + // Strip # + v = regexFraction.ReplaceAllString(v, "") + + if 0 == f.vType { + if regexp.MustCompile("^(\\[\\$[A-Z]*-[0-9A-F]*\\])*[hmsdy]").MatchString(v) { + f.vType = TYPE_DATETIME + } else if strings.HasSuffix(v, "%") { + f.vType = TYPE_PERCENTAGE + } else if strings.HasPrefix(v, "$") || strings.HasPrefix(v, "¥") { + f.vType = TYPE_CURRENCY + } + } + + f.Raw[k] = strings.Trim(v, "\r\n\t ") + } + + if 0 == f.vType { + f.vType = TYPE_NUMERIC + } + + if TYPE_NUMERIC == f.vType || TYPE_CURRENCY == f.vType || TYPE_PERCENTAGE == f.vType { + var t []string + if t = strings.SplitN(f.Raw[0], ".", 2); 2 == len(t) { + f.bts = strings.Count(t[1], "") + + if f.bts > 0 { + f.bts = f.bts - 1 + } + } + } +} + +// String format content to spec string +// see http://www.openoffice.org/sc/excelfileformat.pdf Page #174 +func (f *Format) String(v float64) string { + var ret string + + switch f.vType { + case TYPE_NUMERIC: + if 0 == f.bts { + ret = strconv.FormatInt(int64(v), 10) + } else { + ret = strconv.FormatFloat(v, 'f', f.bts, 64) + } + case TYPE_CURRENCY: + if 0 == f.bts { + ret = strconv.FormatInt(int64(v), 10) + } else { + ret = strconv.FormatFloat(v, 'f', f.bts, 64) + } + case TYPE_PERCENTAGE: + if 0 == f.bts { + ret = strconv.FormatInt(int64(v)*100, 10) + "%" + } else { + ret = strconv.FormatFloat(v*100, 'f', f.bts, 64) + "%" + } + case TYPE_DATETIME: + ret = parseTime(v, f.Raw[0]) + default: + ret = strconv.FormatFloat(v, 'f', -1, 64) + } + + return ret +} + +// ByteToUint32 Read 32-bit unsigned integer +func ByteToUint32(b []byte) uint32 { + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +// ByteToUint16 Read 16-bit unsigned integer +func ByteToUint16(b []byte) uint16 { + return (uint16(b[0]) | (uint16(b[1]) << 8)) +} + +// parseTime provides function to returns a string parsed using time.Time. +// Replace Excel placeholders with Go time placeholders. For example, replace +// yyyy with 2006. These are in a specific order, due to the fact that m is used +// in month, minute, and am/pm. It would be easier to fix that with regular +// expressions, but if it's possible to keep this simple it would be easier to +// maintain. Full-length month and days (e.g. March, Tuesday) have letters in +// them that would be replaced by other characters below (such as the 'h' in +// March, or the 'd' in Tuesday) below. First we convert them to arbitrary +// characters unused in Excel Date formats, and then at the end, turn them to +// what they should actually be. +// Based off: http://www.ozgrid.com/Excel/CustomFormats.htm +func parseTime(v float64, f string) string { + var val time.Time + if 0 == v { + val = time.Now() + } else { + val = timeFromExcelTime(v, false) + } + + // It is the presence of the "am/pm" indicator that determines if this is + // a 12 hour or 24 hours time format, not the number of 'h' characters. + if is12HourTime(f) { + f = strings.Replace(f, "hh", "03", 1) + f = strings.Replace(f, "h", "3", 1) + } else { + f = strings.Replace(f, "hh", "15", 1) + f = strings.Replace(f, "h", "15", 1) + } + for _, repl := range dateTimeMapper { + f = strings.Replace(f, repl.xls, repl.golang, 1) + } + + // If the hour is optional, strip it out, along with the possible dangling + // colon that would remain. + if val.Hour() < 1 { + f = strings.Replace(f, "]:", "]", 1) + f = strings.Replace(f, "[03]", "", 1) + f = strings.Replace(f, "[3]", "", 1) + f = strings.Replace(f, "[15]", "", 1) + } else { + f = strings.Replace(f, "[3]", "3", 1) + f = strings.Replace(f, "[15]", "15", 1) + } + + return val.Format(f) +} + +// is12HourTime checks whether an Excel time format string is a 12 hours form. +func is12HourTime(format string) bool { + return strings.Contains(format, "am/pm") || strings.Contains(format, "AM/PM") || strings.Contains(format, "a/p") || strings.Contains(format, "A/P") } diff --git a/row.go b/row.go index 3100394..f5cea22 100644 --- a/row.go +++ b/row.go @@ -2,8 +2,8 @@ package xls type rowInfo struct { Index uint16 - Fcell uint16 - Lcell uint16 + First uint16 + Last uint16 Height uint16 Notused uint16 Notused2 uint16 @@ -20,27 +20,30 @@ type Row struct { //Col Get the Nth Col from the Row, if has not, return nil. //Suggest use Has function to test it. func (r *Row) Col(i int) string { - serial := uint16(i) + var val string + var serial = uint16(i) + if ch, ok := r.cols[serial]; ok { - strs := ch.String(r.wb) - return strs[0] + val = ch.String(r.wb)[0] } else { for _, v := range r.cols { if v.FirstCol() <= serial && v.LastCol() >= serial { - strs := v.String(r.wb) - return strs[serial-v.FirstCol()] + val = v.String(r.wb)[serial-v.FirstCol()] + + break } } } - return "" -} -//LastCol Get the number of Last Col of the Row. -func (r *Row) LastCol() int { - return int(r.info.Lcell) + return val } //FirstCol Get the number of First Col of the Row. func (r *Row) FirstCol() int { - return int(r.info.Fcell) + return int(r.info.First) +} + +//LastCol Get the number of Last Col of the Row. +func (r *Row) LastCol() int { + return int(r.info.Last) } diff --git a/workbook.go b/workbook.go index b0aa076..ec2bba6 100644 --- a/workbook.go +++ b/workbook.go @@ -4,23 +4,23 @@ import ( "bytes" "encoding/binary" "io" - "os" + "strings" "unicode/utf16" ) //xls workbook type type WorkBook struct { - Is5ver bool - Type uint16 - Codepage uint16 - Xfs []st_xf_data - Fonts []Font - Formats map[uint16]*Format - //All the sheets from the workbook + Is5ver bool + Type uint16 + Codepage uint16 + Xfs []XF + Fonts []Font + Formats map[uint16]*Format sheets []*WorkSheet Author string rs io.ReadSeeker sst []string + ref *extSheetRef continue_utf16 uint16 continue_rich uint16 continue_apsb uint32 @@ -29,62 +29,50 @@ type WorkBook struct { //read workbook from ole2 file func newWorkBookFromOle2(rs io.ReadSeeker) *WorkBook { - wb := new(WorkBook) - wb.Formats = make(map[uint16]*Format) - // wb.bts = bts - wb.rs = rs - wb.sheets = make([]*WorkSheet, 0) - wb.Parse(rs) + var wb = &WorkBook{ + rs: rs, + ref: new(extSheetRef), + sheets: make([]*WorkSheet, 0), + Formats: make(map[uint16]*Format), + } + + wb.parse(rs) + wb.prepare() + return wb } -func (w *WorkBook) Parse(buf io.ReadSeeker) { +func (w *WorkBook) parse(buf io.ReadSeeker) { b := new(bof) - bof_pre := new(bof) - // buf := bytes.NewReader(bts) + bp := new(bof) offset := 0 + for { if err := binary.Read(buf, binary.LittleEndian, b); err == nil { - bof_pre, b, offset = w.parseBof(buf, b, bof_pre, offset) + bp, b, offset = w.parseBof(buf, b, bp, offset) } else { break } } } -func (w *WorkBook) addXf(xf st_xf_data) { - w.Xfs = append(w.Xfs, xf) -} - -func (w *WorkBook) addFont(font *FontInfo, buf io.ReadSeeker) { - name, _ := w.get_string(buf, uint16(font.NameB)) - w.Fonts = append(w.Fonts, Font{Info: font, Name: name}) -} - -func (w *WorkBook) addFormat(format *Format) { - if w.Formats == nil { - os.Exit(1) - } - w.Formats[format.Head.Index] = format -} - func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int) (after *bof, after_using *bof, offset int) { after = b after_using = pre var bts = make([]byte, b.Size) binary.Read(buf, binary.LittleEndian, bts) - buf_item := bytes.NewReader(bts) + item := bytes.NewReader(bts) switch b.Id { - case 0x809: + case 0x0809: // BOF bif := new(biffHeader) - binary.Read(buf_item, binary.LittleEndian, bif) + binary.Read(item, binary.LittleEndian, bif) if bif.Ver != 0x600 { wb.Is5ver = true } wb.Type = bif.Type - case 0x042: // CODEPAGE - binary.Read(buf_item, binary.LittleEndian, &wb.Codepage) - case 0x3c: // CONTINUE + case 0x0042: // CODEPAGE + binary.Read(item, binary.LittleEndian, &wb.Codepage) + case 0x3C: // CONTINUE if pre.Id == 0xfc { var size uint16 var err error @@ -92,12 +80,12 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int size = wb.continue_utf16 wb.continue_utf16 = 0 } else { - err = binary.Read(buf_item, binary.LittleEndian, &size) + err = binary.Read(item, binary.LittleEndian, &size) } for err == nil && offset_pre < len(wb.sst) { var str string if size > 0 { - str, err = wb.get_string(buf_item, size) + str, err = wb.parseString(item, size) wb.sst[offset_pre] = wb.sst[offset_pre] + str } @@ -106,23 +94,23 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int } offset_pre++ - err = binary.Read(buf_item, binary.LittleEndian, &size) + err = binary.Read(item, binary.LittleEndian, &size) } } offset = offset_pre after = pre after_using = b - case 0xfc: // SST + case 0x00FC: // SST info := new(SstInfo) - binary.Read(buf_item, binary.LittleEndian, info) + binary.Read(item, binary.LittleEndian, info) wb.sst = make([]string, info.Count) var size uint16 var i = 0 for ; i < int(info.Count); i++ { var err error - if err = binary.Read(buf_item, binary.LittleEndian, &size); err == nil { + if err = binary.Read(item, binary.LittleEndian, &size); err == nil { var str string - str, err = wb.get_string(buf_item, size) + str, err = wb.parseString(item, size) wb.sst[i] = wb.sst[i] + str } @@ -131,41 +119,90 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int } } offset = i - case 0x85: // bOUNDSHEET + case 0x0085: // SHEET var bs = new(boundsheet) - binary.Read(buf_item, binary.LittleEndian, bs) + binary.Read(item, binary.LittleEndian, bs) // different for BIFF5 and BIFF8 - wb.addSheet(bs, buf_item) - case 0x0e0: // XF + wb.addSheet(bs, item) + case 0x0017: // EXTERNSHEET + if !wb.Is5ver { + binary.Read(item, binary.LittleEndian, &wb.ref.Num) + wb.ref.Info = make([]ExtSheetInfo, wb.ref.Num) + binary.Read(item, binary.LittleEndian, &wb.ref.Info) + } + case 0x00e0: // XF if wb.Is5ver { xf := new(Xf5) - binary.Read(buf_item, binary.LittleEndian, xf) + binary.Read(item, binary.LittleEndian, xf) wb.addXf(xf) } else { xf := new(Xf8) - binary.Read(buf_item, binary.LittleEndian, xf) + binary.Read(item, binary.LittleEndian, xf) wb.addXf(xf) } - case 0x031: // FONT + case 0x0031: // FONT f := new(FontInfo) - binary.Read(buf_item, binary.LittleEndian, f) - wb.addFont(f, buf_item) - case 0x41E: //FORMAT - font := new(Format) - binary.Read(buf_item, binary.LittleEndian, &font.Head) - font.str, _ = wb.get_string(buf_item, font.Head.Size) - wb.addFormat(font) - case 0x22: //DATEMODE - binary.Read(buf_item, binary.LittleEndian, &wb.dateMode) + binary.Read(item, binary.LittleEndian, f) + wb.addFont(f, item) + case 0x041E: //FORMAT + format := new(Format) + binary.Read(item, binary.LittleEndian, &format.Head) + if raw, err := wb.parseString(item, format.Head.Size); nil == err && "" != raw { + format.Raw = strings.Split(raw, ";") + } else { + format.Raw = []string{} + } + + wb.addFormat(format) + case 0x0022: //DATEMODE + binary.Read(item, binary.LittleEndian, &wb.dateMode) } return } -func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err error) { +func (w *WorkBook) addXf(xf XF) { + w.Xfs = append(w.Xfs, xf) +} + +func (w *WorkBook) addFont(font *FontInfo, buf io.ReadSeeker) { + name, _ := w.parseString(buf, uint16(font.NameB)) + w.Fonts = append(w.Fonts, Font{Info: font, Name: name}) +} + +func (w *WorkBook) addFormat(format *Format) { + w.Formats[format.Head.Index] = format +} + +func (w *WorkBook) addSheet(sheet *boundsheet, buf io.ReadSeeker) { + name, _ := w.parseString(buf, uint16(sheet.Name)) + w.sheets = append(w.sheets, &WorkSheet{id: len(w.sheets), bs: sheet, Name: name, wb: w}) +} + +// prepare process workbook struct +func (w *WorkBook) prepare() { + for k, v := range builtInNumFmt { + if _, ok := w.Formats[k]; !ok { + w.Formats[k] = &Format{ + Raw: strings.Split(v, ";"), + } + } + } + for _, v := range w.Formats { + v.Prepare() + } +} + +//reading a sheet from the compress file to memory, you should call this before you try to get anything from sheet +func (w *WorkBook) prepareSheet(sheet *WorkSheet) { + w.rs.Seek(int64(sheet.bs.Filepos), 0) + sheet.parse(w.rs) +} + +func (w *WorkBook) parseString(buf io.ReadSeeker, size uint16) (res string, err error) { if w.Is5ver { var bts = make([]byte, size) _, err = buf.Read(bts) - res = string(bts) + res = string(bytes.Trim(bts, "\r\n\t ")) } else { var richtext_num = uint16(0) var phonetic_size = uint32(0) @@ -190,7 +227,7 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e err = binary.Read(buf, binary.LittleEndian, &bts[i]) } runes := utf16.Decode(bts[:i]) - res = string(runes) + res = strings.Trim(string(runes), "\r\n\t ") if i < size { w.continue_utf16 = size - i + 1 } @@ -208,23 +245,21 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e bts1[k] = uint16(v) } runes := utf16.Decode(bts1) - res = string(runes) + res = strings.Trim(string(runes), "\r\n\t ") } if richtext_num > 0 { var bts []byte - var seek_size int64 + var ss int64 if w.Is5ver { - seek_size = int64(2 * richtext_num) + ss = int64(2 * richtext_num) } else { - seek_size = int64(4 * richtext_num) + ss = int64(4 * richtext_num) } - bts = make([]byte, seek_size) + bts = make([]byte, ss) err = binary.Read(buf, binary.LittleEndian, bts) if err == io.EOF { w.continue_rich = richtext_num } - - // err = binary.Read(buf, binary.LittleEndian, bts) } if phonetic_size > 0 { var bts []byte @@ -238,18 +273,20 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e return } -func (w *WorkBook) addSheet(sheet *boundsheet, buf io.ReadSeeker) { - name, _ := w.get_string(buf, uint16(sheet.Name)) - w.sheets = append(w.sheets, &WorkSheet{bs: sheet, Name: name, wb: w}) +// Format format value to string +func (w *WorkBook) Format(xf uint16, v float64) (string, bool) { + var val string + var idx = int(xf) + if len(w.Xfs) > idx { + if formatter := w.Formats[w.Xfs[idx].FormatNo()]; nil != formatter { + return formatter.String(v), true + } + } + + return val, false } -//reading a sheet from the compress file to memory, you should call this before you try to get anything from sheet -func (w *WorkBook) prepareSheet(sheet *WorkSheet) { - w.rs.Seek(int64(sheet.bs.Filepos), 0) - sheet.parse(w.rs) -} - -//Get one sheet by its number +//GetSheet get one sheet by its number func (w *WorkBook) GetSheet(num int) *WorkSheet { if num < len(w.sheets) { s := w.sheets[num] @@ -257,9 +294,8 @@ func (w *WorkBook) GetSheet(num int) *WorkSheet { w.prepareSheet(s) } return s - } else { - return nil } + return nil } //Get the number of all sheets, look into example @@ -267,9 +303,9 @@ func (w *WorkBook) NumSheets() int { return len(w.sheets) } -//helper function to read all cells from file +//ReadAllCells helper function to read all cells from file //Notice: the max value is the limit of the max capacity of lines. -//Warning: the helper function will need big memeory if file is large. +//Warning: the helper function will need big memory if file is large. func (w *WorkBook) ReadAllCells(max int) (res [][]string) { res = make([][]string, 0) for _, sheet := range w.sheets { @@ -277,11 +313,11 @@ func (w *WorkBook) ReadAllCells(max int) (res [][]string) { max = max - len(res) w.prepareSheet(sheet) if sheet.MaxRow != 0 { - leng := int(sheet.MaxRow) + 1 - if max < leng { - leng = max + length := int(sheet.MaxRow) + 1 + if max < length { + length = max } - temp := make([][]string, leng) + temp := make([][]string, length) for k, row := range sheet.rows { data := make([]string, 0) if len(row.cols) > 0 { @@ -295,7 +331,7 @@ func (w *WorkBook) ReadAllCells(max int) (res [][]string) { data[col.FirstCol()+i] = str[i] } } - if leng > int(k) { + if length > int(k) { temp[k] = data } } diff --git a/worksheet.go b/worksheet.go index 9bf065c..e7deb12 100644 --- a/worksheet.go +++ b/worksheet.go @@ -2,7 +2,6 @@ package xls import ( "encoding/binary" - "fmt" "io" "unicode/utf16" ) @@ -14,6 +13,18 @@ type boundsheet struct { Name byte } +type extSheetRef struct { + Num uint16 + Info []ExtSheetInfo +} + +// ExtSheetInfo external sheet references provided for named cells +type ExtSheetInfo struct { + ExternalBookIndex uint16 + FirstSheetIndex uint16 + LastSheetIndex uint16 +} + //WorkSheet in one WorkBook type WorkSheet struct { bs *boundsheet @@ -22,6 +33,7 @@ type WorkSheet struct { rows map[uint16]*Row //NOTICE: this is the max row number of the sheet, so it should be count -1 MaxRow uint16 + id int parsed bool } @@ -36,15 +48,14 @@ func (w *WorkSheet) Row(i int) *Row { func (w *WorkSheet) parse(buf io.ReadSeeker) { w.rows = make(map[uint16]*Row) b := new(bof) - var bof_pre *bof + var bp *bof for { if err := binary.Read(buf, binary.LittleEndian, b); err == nil { - bof_pre = w.parseBof(buf, b, bof_pre) + bp = w.parseBof(buf, b, bp) if b.Id == 0xa { break } } else { - fmt.Println(err) break } } @@ -63,21 +74,17 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof) *bof { case 0x0BD: //MULRK mc := new(MulrkCol) size := (b.Size - 6) / 6 - binary.Read(buf, binary.LittleEndian, &mc.Col) mc.Xfrks = make([]XfRk, size) - for i := uint16(0); i < size; i++ { - binary.Read(buf, binary.LittleEndian, &mc.Xfrks[i]) - } + binary.Read(buf, binary.LittleEndian, &mc.Col) + binary.Read(buf, binary.LittleEndian, &mc.Xfrks) binary.Read(buf, binary.LittleEndian, &mc.LastColB) col = mc case 0x0BE: //MULBLANK mc := new(MulBlankCol) size := (b.Size - 6) / 2 - binary.Read(buf, binary.LittleEndian, &mc.Col) mc.Xfs = make([]uint16, size) - for i := uint16(0); i < size; i++ { - binary.Read(buf, binary.LittleEndian, &mc.Xfs[i]) - } + binary.Read(buf, binary.LittleEndian, &mc.Col) + binary.Read(buf, binary.LittleEndian, &mc.Xfs) binary.Read(buf, binary.LittleEndian, &mc.LastColB) col = mc case 0x203: //NUMBER @@ -85,10 +92,26 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof) *bof { binary.Read(buf, binary.LittleEndian, col) case 0x06: //FORMULA c := new(FormulaCol) - binary.Read(buf, binary.LittleEndian, &c.Header) + c.ws = w.id + c.Header = new(FormulaColHeader) c.Bts = make([]byte, b.Size-20) + binary.Read(buf, binary.LittleEndian, c.Header) binary.Read(buf, binary.LittleEndian, &c.Bts) col = c + c.parse(w.wb, false) + + if TYPE_STRING == c.vType { + binary.Read(buf, binary.LittleEndian, &c.Code) + binary.Read(buf, binary.LittleEndian, &c.Btl) + binary.Read(buf, binary.LittleEndian, &c.Btc) + + var fms, fme = w.wb.parseString(buf, c.Btc) + if nil == fme { + c.value = fms + } + + buf.Seek(-int64(c.Btl+4), 1) + } case 0x27e: //RK col = new(RkCol) binary.Read(buf, binary.LittleEndian, col) @@ -100,18 +123,18 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof) *bof { binary.Read(buf, binary.LittleEndian, &c.BlankCol) var count uint16 binary.Read(buf, binary.LittleEndian, &count) - c.Str, _ = w.wb.get_string(buf, count) + c.Str, _ = w.wb.parseString(buf, count) col = c case 0x201: //BLANK col = new(BlankCol) binary.Read(buf, binary.LittleEndian, col) case 0x1b8: //HYPERLINK + var flag uint32 + var count uint32 var hy HyperLink binary.Read(buf, binary.LittleEndian, &hy.CellRange) buf.Seek(20, 1) - var flag uint32 binary.Read(buf, binary.LittleEndian, &flag) - var count uint32 if flag&0x14 != 0 { binary.Read(buf, binary.LittleEndian, &count) @@ -172,7 +195,6 @@ func (w *WorkSheet) add(content interface{}) { w.addCell(col, ch) } } - } func (w *WorkSheet) addCell(col Coler, ch contentHandler) { @@ -180,33 +202,36 @@ func (w *WorkSheet) addCell(col Coler, ch contentHandler) { } func (w *WorkSheet) addRange(rang Ranger, ch contentHandler) { - for i := rang.FirstRow(); i <= rang.LastRow(); i++ { w.addContent(i, ch) } } -func (w *WorkSheet) addContent(row_num uint16, ch contentHandler) { +func (w *WorkSheet) addContent(num uint16, ch contentHandler) { var row *Row var ok bool - if row, ok = w.rows[row_num]; !ok { + if row, ok = w.rows[num]; !ok { info := new(rowInfo) - info.Index = row_num + info.Index = num row = w.addRow(info) } row.cols[ch.FirstCol()] = ch } -func (w *WorkSheet) addRow(info *rowInfo) (row *Row) { +func (w *WorkSheet) addRow(info *rowInfo) *Row { + var ok bool + var row *Row + if info.Index > w.MaxRow { w.MaxRow = info.Index } - var ok bool + if row, ok = w.rows[info.Index]; ok { row.info = info } else { - row = &Row{info: info, cols: make(map[uint16]contentHandler)} + row = &Row{info: info, cols: make(map[uint16]contentHandler, int(info.Last-info.First))} w.rows[info.Index] = row } - return + + return row } diff --git a/xf.go b/xf.go index 8f4dd1e..240f964 100644 --- a/xf.go +++ b/xf.go @@ -1,5 +1,9 @@ package xls +type XF interface { + FormatNo() uint16 +} + type Xf5 struct { Font uint16 Format uint16 @@ -11,7 +15,7 @@ type Xf5 struct { Linestyle uint16 } -func (x *Xf5) formatNo() uint16 { +func (x *Xf5) FormatNo() uint16 { return x.Format } @@ -28,10 +32,6 @@ type Xf8 struct { Groundcolor uint16 } -func (x *Xf8) formatNo() uint16 { +func (x *Xf8) FormatNo() uint16 { return x.Format } - -type st_xf_data interface { - formatNo() uint16 -} diff --git a/xls.go b/xls.go index 7979cb9..02ed128 100644 --- a/xls.go +++ b/xls.go @@ -1,7 +1,9 @@ package xls import ( + "bytes" "io" + "io/ioutil" "os" "github.com/extrame/ole2" @@ -16,7 +18,16 @@ func Open(file string, charset string) (*WorkBook, error) { } } -//Open one xls file and return the closer +//OpenWithBuffer open one xls file with memory buffer +func OpenWithBuffer(file string, charset string) (*WorkBook, error) { + if fi, err := ioutil.ReadFile(file); err == nil { + return OpenReader(bytes.NewReader(fi), charset) + } else { + return nil, err + } +} + +//OpenWithCloser open one xls file and return the closer func OpenWithCloser(file string, charset string) (*WorkBook, io.Closer, error) { if fi, err := os.Open(file); err == nil { wb, err := OpenReader(fi, charset) @@ -26,7 +37,7 @@ func OpenWithCloser(file string, charset string) (*WorkBook, io.Closer, error) { } } -//Open xls file from reader +//OpenReader open xls file from reader func OpenReader(reader io.ReadSeeker, charset string) (wb *WorkBook, err error) { var ole *ole2.Ole if ole, err = ole2.Open(reader, charset); err == nil {