21
0
Fork 0

Merge pull request #50 from sergeilem/master

FIX: data corruption while reading, issues #31 #46 #47
This commit is contained in:
Lucas Liu 2020-04-26 20:46:01 +08:00 committed by GitHub
commit 4a6cf26307
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 198 additions and 83 deletions

23
.travis.yml Normal file
View File

@ -0,0 +1,23 @@
language: go
# Force-enable Go modules. This will be unnecessary when Go 1.12 lands.
env:
- GO111MODULE=on
go:
- 1.11.x
# Only clone the most recent commit.
git:
depth: 1
# Skip the install step. Don't `go get` dependencies. Only build with the code
# in vendor/
install: true
# Don't email me the results of the test runs.
notifications:
email: false
script:
- go test -v -race ./... # Run all the tests with the race detector enabled

BIN
BigTable.xls Normal file

Binary file not shown.

View File

@ -2,17 +2,9 @@
[![GoDoc](https://godoc.org/github.com/extrame/xls?status.svg)](https://godoc.org/github.com/extrame/xls)
Pure Golang xls library writen by [Rongshu Tech(chinese)](http://www.rongshu.tech).
Pure Golang xls library writen by [Rongshu Tech (chinese)](http://www.rongshu.tech), based on libxls.
Thanks for contributions from Tamás Gulácsi, sergeilem.
**English User please mailto** [Liu Ming](mailto:liuming@rongshu.tech)
This is a xls library writen in pure Golang. Almostly it is translated from the libxls library in c.
The master brunch has just the reading function without the format.
***new_formater** branch is for better format for date and number ,but just under test, you can try it in development environment. If you have some problem about the output format, tell me the problem, I will try to fix it.*
Thanks for contributions from Tamás Gulácsi @tgulacsi, @flyin9.
# Basic Usage
@ -20,5 +12,4 @@ The master brunch has just the reading function without the format.
* Use **OpenWithCloser** function for open file and use the return value closer for close file
* Use **OpenReader** function for open xls from a reader, you should close related file in your own code
* Follow the example in GODOC
* Follow the example in GoDoc

59
bigtable_test.go Normal file
View File

@ -0,0 +1,59 @@
package xls
import (
"fmt"
"testing"
"time"
)
func TestBigTable(t *testing.T) {
xlFile, err := Open("BigTable.xls", "utf-8")
if err != nil {
t.Fatalf("Cant open xls file: %s", err)
}
sheet := xlFile.GetSheet(0)
if sheet == nil {
t.Fatal("Cant get sheet")
}
cnt1 := 1
cnt2 := 10000
cnt3 := 20000
date1, _ := time.Parse("2006-01-02", "2015-01-01")
date2, _ := time.Parse("2006-01-02", "2016-01-01")
date3, _ := time.Parse("2006-01-02", "2017-01-01")
for i := 1; i <= 4999; i++ {
row := sheet.Row(i)
if row == nil {
continue
}
col2sample := fmt.Sprintf("%d от %s", cnt1, date1.Format("02.01.2006"))
col5sample := fmt.Sprintf("%d от %s", cnt2, date2.Format("02.01.2006"))
col8sample := fmt.Sprintf("%d от %s", cnt3, date3.Format("02.01.2006"))
col2 := row.Col(2)
col5 := row.Col(5)
col8 := row.Col(8)
if col2 != col2sample {
t.Fatalf("Row %d: col 2 val not eq base value: %s != %s", i, col2, col2sample)
}
if col5 != col5sample {
t.Fatalf("Row %d: col 5 val not eq base value: %s != %s", i, col5, col5sample)
}
if col8 != col8sample {
t.Fatalf("Row %d: col 8 val not eq base value: %s != %s", i, col8, col8sample)
}
cnt1++
cnt2++
cnt3++
date1 = date1.AddDate(0, 0, 1)
date2 = date2.AddDate(0, 0, 1)
date3 = date3.AddDate(0, 0, 1)
}
}

15
col.go
View File

@ -54,7 +54,16 @@ func (xf *XfRk) String(wb *WorkBook) string {
fNo := wb.Xfs[idx].formatNo()
if fNo >= 164 { // user defined format
if formatter := wb.Formats[fNo]; formatter != nil {
if strings.Contains(formatter.str, "#") || strings.Contains(formatter.str, ".00") {
formatterLower := strings.ToLower(formatter.str)
if formatterLower == "general" ||
strings.Contains(formatter.str, "#") ||
strings.Contains(formatter.str, ".00") ||
strings.Contains(formatterLower, "m/y") ||
strings.Contains(formatterLower, "d/y") ||
strings.Contains(formatterLower, "m.y") ||
strings.Contains(formatterLower, "d.y") ||
strings.Contains(formatterLower, "h:") ||
strings.Contains(formatterLower, "д.г") {
//If format contains # or .00 then this is a number
return xf.Rk.String()
} else {
@ -84,7 +93,7 @@ type RK uint32
func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) {
multiplied := rk & 1
isInt := rk & 2
val := rk >> 2
val := int32(rk) >> 2
if isInt == 0 {
isFloat = true
floatNum = math.Float64frombits(uint64(val) << 34)
@ -93,13 +102,11 @@ func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) {
}
return
}
//+++ add lines from here
if multiplied != 0 {
isFloat = true
floatNum = float64(val) / 100
return
}
//+++end
return int64(val), 0, false
}

55
comparexlsxlsx.go Normal file
View File

@ -0,0 +1,55 @@
package xls
import (
"fmt"
"github.com/tealeg/xlsx"
"math"
"strconv"
)
//Compares xls and xlsx files
func CompareXlsXlsx(xlsfilepathname string, xlsxfilepathname string) string {
xlsFile, err := Open(xlsfilepathname, "utf-8")
if err != nil {
return fmt.Sprintf("Cant open xls file: %s", err)
}
xlsxFile, err := xlsx.OpenFile(xlsxfilepathname)
if err != nil {
return fmt.Sprintf("Cant open xlsx file: %s", err)
}
for sheet, xlsxSheet := range xlsxFile.Sheets {
xlsSheet := xlsFile.GetSheet(sheet)
if xlsSheet == nil {
return fmt.Sprintf("Cant get xls sheet")
}
for row, xlsxRow := range xlsxSheet.Rows {
xlsRow := xlsSheet.Row(row)
for cell, xlsxCell := range xlsxRow.Cells {
xlsxText := xlsxCell.String()
xlsText := xlsRow.Col(cell)
if xlsText != xlsxText {
//try to convert to numbers
xlsFloat, xlsErr := strconv.ParseFloat(xlsText, 64)
xlsxFloat, xlsxErr := strconv.ParseFloat(xlsxText, 64)
//check if numbers have no significant difference
if xlsErr == nil && xlsxErr == nil {
diff := math.Abs(xlsFloat - xlsxFloat)
if diff > 0.0000001 {
return fmt.Sprintf("sheet:%d, row/col: %d/%d, xlsx: (%s)[%d], xls: (%s)[%d], numbers difference: %f.",
sheet, row, cell, xlsxText, len(xlsxText),
xlsText, len(xlsText), diff)
}
} else {
return fmt.Sprintf("sheet:%d, row/col: %d/%d, xlsx: (%s)[%d], xls: (%s)[%d].",
sheet, row, cell, xlsxText, len(xlsxText),
xlsText, len(xlsText))
}
}
}
}
}
return ""
}

30
issue47_test.go Normal file
View File

@ -0,0 +1,30 @@
package xls
import (
"io/ioutil"
"path"
"path/filepath"
"strings"
"testing"
)
func TestIssue47(t *testing.T) {
testdatapath := "testdata"
files, err := ioutil.ReadDir(testdatapath)
if err != nil {
t.Fatalf("Cant read testdata directory contents: %s", err)
}
for _, f := range files {
if filepath.Ext(f.Name()) == ".xls" {
xlsfilename := f.Name()
xlsxfilename := strings.TrimSuffix(xlsfilename, filepath.Ext(xlsfilename)) + ".xlsx"
err := CompareXlsXlsx(path.Join(testdatapath, xlsfilename),
path.Join(testdatapath, xlsxfilename))
if err != "" {
t.Fatalf("XLS file %s an XLSX file are not equal: %s", xlsfilename, err)
}
}
}
}

BIN
testdata/bigtable.xls vendored Normal file

Binary file not shown.

BIN
testdata/bigtable.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/float.xls vendored Normal file

Binary file not shown.

BIN
testdata/float.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/issue47.xls vendored Normal file

Binary file not shown.

BIN
testdata/issue47.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/negatives.xls vendored Normal file

Binary file not shown.

BIN
testdata/negatives.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/superstore.xls vendored Normal file

Binary file not shown.

BIN
testdata/superstore.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/times.xls vendored Normal file

Binary file not shown.

BIN
testdata/times.xlsx vendored Normal file

Binary file not shown.

View File

@ -97,10 +97,8 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
}
for err == nil && offset_pre < len(wb.sst) {
var str string
if size > 0 {
str, err = wb.get_string(buf_item, size)
wb.sst[offset_pre] = wb.sst[offset_pre] + str
}
str, err = wb.get_string(buf_item, size)
wb.sst[offset_pre] = wb.sst[offset_pre] + str
if err == io.EOF {
break
@ -119,9 +117,12 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
wb.sst = make([]string, info.Count)
var size uint16
var i = 0
// dont forget to initialize offset
offset = 0
for ; i < int(info.Count); i++ {
var err error
if err = binary.Read(buf_item, binary.LittleEndian, &size); err == nil {
err = binary.Read(buf_item, binary.LittleEndian, &size)
if err == nil {
var str string
str, err = wb.get_string(buf_item, size)
wb.sst[i] = wb.sst[i] + str
@ -132,7 +133,7 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
}
}
offset = i
case 0x85: // bOUNDSHEET
case 0x85: // boundsheet
var bs = new(boundsheet)
binary.Read(buf_item, binary.LittleEndian, bs)
// different for BIFF5 and BIFF8
@ -195,11 +196,19 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
for ; i < size && err == nil; i++ {
err = binary.Read(buf, binary.LittleEndian, &bts[i])
}
runes := utf16.Decode(bts[:i])
// when eof found, we dont want to append last element
var runes []rune
if err == io.EOF {
i = i - 1
}
runes = utf16.Decode(bts[:i])
res = string(runes)
if i < size {
w.continue_utf16 = size - i + 1
w.continue_utf16 = size - i
}
} else {
var bts = make([]byte, size)
var n int

View File

@ -3,7 +3,6 @@ package xls
import (
"fmt"
"testing"
"unicode/utf16"
)
func TestOpen(t *testing.T) {
@ -27,61 +26,3 @@ func TestOpen(t *testing.T) {
}
}
}
func TestEuropeString(t *testing.T) {
bts := []byte{66, 233, 114, 232}
var bts1 = make([]uint16, 4)
for k, v := range bts {
bts1[k] = uint16(v)
}
runes := utf16.Decode(bts1)
fmt.Println(string(runes))
}
// func TestOpen1(t *testing.T) {
// xlFile, _ := Open("000.xls", "")
// for i := 0; i < xlFile.NumSheets(); i++ {
// fmt.Println(xlFile.GetSheet(i).Name)
// sheet := xlFile.GetSheet(i)
// row := sheet.Row(1]
// for i, col := range row.Cols {
// fmt.Println(i, col.String(xlFile))
// }
// }
// // sheet1 := xlFile.GetSheet(0)
// // fmt.Println(sheet1.Name)
// // fmt.Print(sheet1.Row()
// // for k, row1 := range sheet1.Row({
// // // row1 := sheet1.Row(1]
// // fmt.Printf("\n[%d]", k)
// // for _, col1 := range row1.Cols {
// // // col1 := row1.Cols[0]
// // fmt.Print(col1.LastCol())
// // fmt.Print(" ")
// // }
// // }
// }
// func TestBof(t *testing.T) {
// b := new(bof)
// b.Id = 0x41E
// b.Size = 55
// buf := bytes.NewReader([]byte{0x07, 0x00, 0x19, 0x00, 0x01, 0x22, 0x00, 0xE5, 0xFF, 0x22, 0x00, 0x23, 0x00, 0x2C, 0x00, 0x23, 0x00, 0x23, 0x00, 0x30, 0x00, 0x2E, 0x00, 0x30, 0x00, 0x30, 0x00, 0x3B, 0x00, 0x22, 0x00, 0xE5, 0xFF, 0x22, 0x00, 0x5C, 0x00, 0x2D, 0x00, 0x23, 0x00, 0x2C, 0x20, 0x00})
// wb := new(WorkBook)
// wb.Formats = make(map[uint16]*Format)
// wb.parseBof(buf, b, b, 0)
// }
// func TestMaxRow(t *testing.T) {
// xlFile, err := Open("Table.xls", "utf-8")
// if err != nil {
// fmt.Fprintf(os.Stderr, "Failure: %v\n", err)
// t.Error(err)
// }
// if sheet1 := xlFile.GetSheet(0); sheet1 != nil {
// if sheet1.MaxRow != 11 {
// t.Errorf("max row is error,is %d instead of 11", sheet1.MaxRow)
// }
// }
// }