20
0

Merge pull request #50 from sergeilem/master

FIX: data corruption while reading, issues #31 #46 #47
This commit is contained in:
Lucas Liu 2020-04-26 20:46:01 +08:00 committed by GitHub
commit 4a6cf26307
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 198 additions and 83 deletions

23
.travis.yml Normal file
View File

@ -0,0 +1,23 @@
language: go
# Force-enable Go modules. This will be unnecessary when Go 1.12 lands.
env:
- GO111MODULE=on
go:
- 1.11.x
# Only clone the most recent commit.
git:
depth: 1
# Skip the install step. Don't `go get` dependencies. Only build with the code
# in vendor/
install: true
# Don't email me the results of the test runs.
notifications:
email: false
script:
- go test -v -race ./... # Run all the tests with the race detector enabled

BIN
BigTable.xls Normal file

Binary file not shown.

View File

@ -2,17 +2,9 @@
[![GoDoc](https://godoc.org/github.com/extrame/xls?status.svg)](https://godoc.org/github.com/extrame/xls) [![GoDoc](https://godoc.org/github.com/extrame/xls?status.svg)](https://godoc.org/github.com/extrame/xls)
Pure Golang xls library writen by [Rongshu Tech(chinese)](http://www.rongshu.tech). Pure Golang xls library writen by [Rongshu Tech (chinese)](http://www.rongshu.tech), based on libxls.
Thanks for contributions from Tamás Gulácsi, sergeilem. Thanks for contributions from Tamás Gulácsi @tgulacsi, @flyin9.
**English User please mailto** [Liu Ming](mailto:liuming@rongshu.tech)
This is a xls library writen in pure Golang. Almostly it is translated from the libxls library in c.
The master brunch has just the reading function without the format.
***new_formater** branch is for better format for date and number ,but just under test, you can try it in development environment. If you have some problem about the output format, tell me the problem, I will try to fix it.*
# Basic Usage # Basic Usage
@ -20,5 +12,4 @@ The master brunch has just the reading function without the format.
* Use **OpenWithCloser** function for open file and use the return value closer for close file * Use **OpenWithCloser** function for open file and use the return value closer for close file
* Use **OpenReader** function for open xls from a reader, you should close related file in your own code * Use **OpenReader** function for open xls from a reader, you should close related file in your own code
* Follow the example in GODOC * Follow the example in GoDoc

59
bigtable_test.go Normal file
View File

@ -0,0 +1,59 @@
package xls
import (
"fmt"
"testing"
"time"
)
func TestBigTable(t *testing.T) {
xlFile, err := Open("BigTable.xls", "utf-8")
if err != nil {
t.Fatalf("Cant open xls file: %s", err)
}
sheet := xlFile.GetSheet(0)
if sheet == nil {
t.Fatal("Cant get sheet")
}
cnt1 := 1
cnt2 := 10000
cnt3 := 20000
date1, _ := time.Parse("2006-01-02", "2015-01-01")
date2, _ := time.Parse("2006-01-02", "2016-01-01")
date3, _ := time.Parse("2006-01-02", "2017-01-01")
for i := 1; i <= 4999; i++ {
row := sheet.Row(i)
if row == nil {
continue
}
col2sample := fmt.Sprintf("%d от %s", cnt1, date1.Format("02.01.2006"))
col5sample := fmt.Sprintf("%d от %s", cnt2, date2.Format("02.01.2006"))
col8sample := fmt.Sprintf("%d от %s", cnt3, date3.Format("02.01.2006"))
col2 := row.Col(2)
col5 := row.Col(5)
col8 := row.Col(8)
if col2 != col2sample {
t.Fatalf("Row %d: col 2 val not eq base value: %s != %s", i, col2, col2sample)
}
if col5 != col5sample {
t.Fatalf("Row %d: col 5 val not eq base value: %s != %s", i, col5, col5sample)
}
if col8 != col8sample {
t.Fatalf("Row %d: col 8 val not eq base value: %s != %s", i, col8, col8sample)
}
cnt1++
cnt2++
cnt3++
date1 = date1.AddDate(0, 0, 1)
date2 = date2.AddDate(0, 0, 1)
date3 = date3.AddDate(0, 0, 1)
}
}

15
col.go
View File

@ -54,7 +54,16 @@ func (xf *XfRk) String(wb *WorkBook) string {
fNo := wb.Xfs[idx].formatNo() fNo := wb.Xfs[idx].formatNo()
if fNo >= 164 { // user defined format if fNo >= 164 { // user defined format
if formatter := wb.Formats[fNo]; formatter != nil { if formatter := wb.Formats[fNo]; formatter != nil {
if strings.Contains(formatter.str, "#") || strings.Contains(formatter.str, ".00") { formatterLower := strings.ToLower(formatter.str)
if formatterLower == "general" ||
strings.Contains(formatter.str, "#") ||
strings.Contains(formatter.str, ".00") ||
strings.Contains(formatterLower, "m/y") ||
strings.Contains(formatterLower, "d/y") ||
strings.Contains(formatterLower, "m.y") ||
strings.Contains(formatterLower, "d.y") ||
strings.Contains(formatterLower, "h:") ||
strings.Contains(formatterLower, "д.г") {
//If format contains # or .00 then this is a number //If format contains # or .00 then this is a number
return xf.Rk.String() return xf.Rk.String()
} else { } else {
@ -84,7 +93,7 @@ type RK uint32
func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) { func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) {
multiplied := rk & 1 multiplied := rk & 1
isInt := rk & 2 isInt := rk & 2
val := rk >> 2 val := int32(rk) >> 2
if isInt == 0 { if isInt == 0 {
isFloat = true isFloat = true
floatNum = math.Float64frombits(uint64(val) << 34) floatNum = math.Float64frombits(uint64(val) << 34)
@ -93,13 +102,11 @@ func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) {
} }
return return
} }
//+++ add lines from here
if multiplied != 0 { if multiplied != 0 {
isFloat = true isFloat = true
floatNum = float64(val) / 100 floatNum = float64(val) / 100
return return
} }
//+++end
return int64(val), 0, false return int64(val), 0, false
} }

55
comparexlsxlsx.go Normal file
View File

@ -0,0 +1,55 @@
package xls
import (
"fmt"
"github.com/tealeg/xlsx"
"math"
"strconv"
)
//Compares xls and xlsx files
func CompareXlsXlsx(xlsfilepathname string, xlsxfilepathname string) string {
xlsFile, err := Open(xlsfilepathname, "utf-8")
if err != nil {
return fmt.Sprintf("Cant open xls file: %s", err)
}
xlsxFile, err := xlsx.OpenFile(xlsxfilepathname)
if err != nil {
return fmt.Sprintf("Cant open xlsx file: %s", err)
}
for sheet, xlsxSheet := range xlsxFile.Sheets {
xlsSheet := xlsFile.GetSheet(sheet)
if xlsSheet == nil {
return fmt.Sprintf("Cant get xls sheet")
}
for row, xlsxRow := range xlsxSheet.Rows {
xlsRow := xlsSheet.Row(row)
for cell, xlsxCell := range xlsxRow.Cells {
xlsxText := xlsxCell.String()
xlsText := xlsRow.Col(cell)
if xlsText != xlsxText {
//try to convert to numbers
xlsFloat, xlsErr := strconv.ParseFloat(xlsText, 64)
xlsxFloat, xlsxErr := strconv.ParseFloat(xlsxText, 64)
//check if numbers have no significant difference
if xlsErr == nil && xlsxErr == nil {
diff := math.Abs(xlsFloat - xlsxFloat)
if diff > 0.0000001 {
return fmt.Sprintf("sheet:%d, row/col: %d/%d, xlsx: (%s)[%d], xls: (%s)[%d], numbers difference: %f.",
sheet, row, cell, xlsxText, len(xlsxText),
xlsText, len(xlsText), diff)
}
} else {
return fmt.Sprintf("sheet:%d, row/col: %d/%d, xlsx: (%s)[%d], xls: (%s)[%d].",
sheet, row, cell, xlsxText, len(xlsxText),
xlsText, len(xlsText))
}
}
}
}
}
return ""
}

30
issue47_test.go Normal file
View File

@ -0,0 +1,30 @@
package xls
import (
"io/ioutil"
"path"
"path/filepath"
"strings"
"testing"
)
func TestIssue47(t *testing.T) {
testdatapath := "testdata"
files, err := ioutil.ReadDir(testdatapath)
if err != nil {
t.Fatalf("Cant read testdata directory contents: %s", err)
}
for _, f := range files {
if filepath.Ext(f.Name()) == ".xls" {
xlsfilename := f.Name()
xlsxfilename := strings.TrimSuffix(xlsfilename, filepath.Ext(xlsfilename)) + ".xlsx"
err := CompareXlsXlsx(path.Join(testdatapath, xlsfilename),
path.Join(testdatapath, xlsxfilename))
if err != "" {
t.Fatalf("XLS file %s an XLSX file are not equal: %s", xlsfilename, err)
}
}
}
}

BIN
testdata/bigtable.xls vendored Normal file

Binary file not shown.

BIN
testdata/bigtable.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/float.xls vendored Normal file

Binary file not shown.

BIN
testdata/float.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/issue47.xls vendored Normal file

Binary file not shown.

BIN
testdata/issue47.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/negatives.xls vendored Normal file

Binary file not shown.

BIN
testdata/negatives.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/superstore.xls vendored Normal file

Binary file not shown.

BIN
testdata/superstore.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/times.xls vendored Normal file

Binary file not shown.

BIN
testdata/times.xlsx vendored Normal file

Binary file not shown.

View File

@ -97,10 +97,8 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
} }
for err == nil && offset_pre < len(wb.sst) { for err == nil && offset_pre < len(wb.sst) {
var str string var str string
if size > 0 { str, err = wb.get_string(buf_item, size)
str, err = wb.get_string(buf_item, size) wb.sst[offset_pre] = wb.sst[offset_pre] + str
wb.sst[offset_pre] = wb.sst[offset_pre] + str
}
if err == io.EOF { if err == io.EOF {
break break
@ -119,9 +117,12 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
wb.sst = make([]string, info.Count) wb.sst = make([]string, info.Count)
var size uint16 var size uint16
var i = 0 var i = 0
// dont forget to initialize offset
offset = 0
for ; i < int(info.Count); i++ { for ; i < int(info.Count); i++ {
var err error var err error
if err = binary.Read(buf_item, binary.LittleEndian, &size); err == nil { err = binary.Read(buf_item, binary.LittleEndian, &size)
if err == nil {
var str string var str string
str, err = wb.get_string(buf_item, size) str, err = wb.get_string(buf_item, size)
wb.sst[i] = wb.sst[i] + str wb.sst[i] = wb.sst[i] + str
@ -132,7 +133,7 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
} }
} }
offset = i offset = i
case 0x85: // bOUNDSHEET case 0x85: // boundsheet
var bs = new(boundsheet) var bs = new(boundsheet)
binary.Read(buf_item, binary.LittleEndian, bs) binary.Read(buf_item, binary.LittleEndian, bs)
// different for BIFF5 and BIFF8 // different for BIFF5 and BIFF8
@ -195,11 +196,19 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
for ; i < size && err == nil; i++ { for ; i < size && err == nil; i++ {
err = binary.Read(buf, binary.LittleEndian, &bts[i]) err = binary.Read(buf, binary.LittleEndian, &bts[i])
} }
runes := utf16.Decode(bts[:i])
// when eof found, we dont want to append last element
var runes []rune
if err == io.EOF {
i = i - 1
}
runes = utf16.Decode(bts[:i])
res = string(runes) res = string(runes)
if i < size { if i < size {
w.continue_utf16 = size - i + 1 w.continue_utf16 = size - i
} }
} else { } else {
var bts = make([]byte, size) var bts = make([]byte, size)
var n int var n int

View File

@ -3,7 +3,6 @@ package xls
import ( import (
"fmt" "fmt"
"testing" "testing"
"unicode/utf16"
) )
func TestOpen(t *testing.T) { func TestOpen(t *testing.T) {
@ -27,61 +26,3 @@ func TestOpen(t *testing.T) {
} }
} }
} }
func TestEuropeString(t *testing.T) {
bts := []byte{66, 233, 114, 232}
var bts1 = make([]uint16, 4)
for k, v := range bts {
bts1[k] = uint16(v)
}
runes := utf16.Decode(bts1)
fmt.Println(string(runes))
}
// func TestOpen1(t *testing.T) {
// xlFile, _ := Open("000.xls", "")
// for i := 0; i < xlFile.NumSheets(); i++ {
// fmt.Println(xlFile.GetSheet(i).Name)
// sheet := xlFile.GetSheet(i)
// row := sheet.Row(1]
// for i, col := range row.Cols {
// fmt.Println(i, col.String(xlFile))
// }
// }
// // sheet1 := xlFile.GetSheet(0)
// // fmt.Println(sheet1.Name)
// // fmt.Print(sheet1.Row()
// // for k, row1 := range sheet1.Row({
// // // row1 := sheet1.Row(1]
// // fmt.Printf("\n[%d]", k)
// // for _, col1 := range row1.Cols {
// // // col1 := row1.Cols[0]
// // fmt.Print(col1.LastCol())
// // fmt.Print(" ")
// // }
// // }
// }
// func TestBof(t *testing.T) {
// b := new(bof)
// b.Id = 0x41E
// b.Size = 55
// buf := bytes.NewReader([]byte{0x07, 0x00, 0x19, 0x00, 0x01, 0x22, 0x00, 0xE5, 0xFF, 0x22, 0x00, 0x23, 0x00, 0x2C, 0x00, 0x23, 0x00, 0x23, 0x00, 0x30, 0x00, 0x2E, 0x00, 0x30, 0x00, 0x30, 0x00, 0x3B, 0x00, 0x22, 0x00, 0xE5, 0xFF, 0x22, 0x00, 0x5C, 0x00, 0x2D, 0x00, 0x23, 0x00, 0x2C, 0x20, 0x00})
// wb := new(WorkBook)
// wb.Formats = make(map[uint16]*Format)
// wb.parseBof(buf, b, b, 0)
// }
// func TestMaxRow(t *testing.T) {
// xlFile, err := Open("Table.xls", "utf-8")
// if err != nil {
// fmt.Fprintf(os.Stderr, "Failure: %v\n", err)
// t.Error(err)
// }
// if sheet1 := xlFile.GetSheet(0); sheet1 != nil {
// if sheet1.MaxRow != 11 {
// t.Errorf("max row is error,is %d instead of 11", sheet1.MaxRow)
// }
// }
// }