Skip to content

Commit 0e95035

Browse files
authored
Merge pull request #507 from rusq/i501
Ignore invalid JSON for export and dump sources
2 parents c59eb6d + 2e3042b commit 0e95035

File tree

13 files changed

+310
-85
lines changed

13 files changed

+310
-85
lines changed

cmd/slackdump/internal/convertcmd/to_export.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func toExport(ctx context.Context, src, trg string, cflg convertflags) error {
3030
defer fsa.Close()
3131

3232
// output storage
33-
sttFn, ok := source.StorageTypeFuncs[cflg.outStorageType]
33+
sttFn, ok := cflg.outStorageType.Func()
3434
if !ok {
3535
return ErrStorage
3636
}

internal/chunk/directory.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,11 @@ func (d *Directory) Walk(fn func(name string, f *File, err error) error) error {
249249
isDir = de.IsDir()
250250
isHidden = len(de.Name()) > 0 && de.Name()[0] == '.'
251251
)
252-
if !isSupported || isDir || isHidden {
252+
if isDir && path != d.dir {
253+
// skip nested directories
254+
return fs.SkipDir
255+
}
256+
if !isSupported || isHidden {
253257
return nil
254258
}
255259
f, err := d.openRAW(path)

internal/chunk/directory_test.go

Lines changed: 76 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
package chunk
22

33
import (
4-
"bytes"
5-
"compress/gzip"
6-
"encoding/json"
4+
"errors"
75
"io/fs"
8-
"os"
6+
"strings"
97
"testing"
108
"testing/fstest"
119

1210
"github.com/rusq/slack"
11+
"github.com/stretchr/testify/assert"
1312

1413
"github.com/rusq/slackdump/v3/internal/fixtures"
14+
"github.com/rusq/slackdump/v3/internal/testutil"
1515
)
1616

1717
// assortment of channel info chunks
@@ -70,46 +70,8 @@ var (
7070
}
7171
)
7272

73-
func TestOpenDir(t *testing.T) {
74-
}
75-
7673
func TestDirectory_Walk(t *testing.T) {
77-
var (
78-
// prepDir prepares a temporary directory for testing and populates it with
79-
// files from fsys. It returns the path to the directory.
80-
prepDir = func(t *testing.T, fsys fs.FS) string {
81-
t.Helper()
82-
dir := t.TempDir()
83-
if err := os.CopyFS(dir, fsys); err != nil {
84-
t.Fatal(err)
85-
}
86-
return dir
87-
}
88-
89-
compress = func(t *testing.T, data []byte) []byte {
90-
t.Helper()
91-
var buf bytes.Buffer
92-
gz := gzip.NewWriter(&buf)
93-
defer gz.Close()
94-
if _, err := gz.Write(data); err != nil {
95-
t.Fatal(err)
96-
}
97-
if err := gz.Close(); err != nil {
98-
t.Fatal(err)
99-
}
100-
return buf.Bytes()
101-
}
102-
103-
marshal = func(t *testing.T, v any) []byte {
104-
t.Helper()
105-
data, err := json.Marshal(v)
106-
if err != nil {
107-
t.Fatal(err)
108-
}
109-
return data
110-
}
111-
)
112-
74+
fixtures.SkipOnWindows(t) // TODO: fix this test on Windows
11375
testChannels := fixtures.Load[[]slack.Channel](fixtures.TestChannelsJSON)
11476
channelInfos := make([]Chunk, len(testChannels))
11577
for _, ch := range testChannels {
@@ -123,43 +85,81 @@ func TestDirectory_Walk(t *testing.T) {
12385
t.Fatal("fixture has no channels")
12486
}
12587

126-
t.Run("doesn't fail on invalid json", func(t *testing.T) {
127-
testdir := fstest.MapFS{
128-
"C123VALID.json.gz": &fstest.MapFile{
129-
Data: compress(t, marshal(t, channelInfos[0])),
88+
tests := []struct {
89+
name string
90+
fsys fs.FS
91+
want []string
92+
wantErr bool
93+
}{
94+
{
95+
name: "invalid json in root",
96+
fsys: fstest.MapFS{
97+
"C123VALID.json.gz": &fstest.MapFile{
98+
Data: testutil.GZCompress(t, testutil.MarshalJSON(t, channelInfos[0])),
99+
},
100+
"C123INVALID.json.gz": &fstest.MapFile{
101+
Data: testutil.GZCompress(t, []byte("invalid json")),
102+
},
103+
"C123VALID2.json.gz": &fstest.MapFile{
104+
Data: testutil.GZCompress(t, testutil.MarshalJSON(t, channelInfos[1])),
105+
},
130106
},
131-
"C123INVALID.json.gz": &fstest.MapFile{
132-
Data: compress(t, []byte("invalid json")),
107+
want: []string{
108+
"C123VALID.json.gz",
109+
"C123VALID2.json.gz",
133110
},
134-
"C123VALID2.json.gz": &fstest.MapFile{
135-
Data: compress(t, marshal(t, channelInfos[1])),
111+
},
112+
{
113+
name: "should scan only top level dir",
114+
fsys: fstest.MapFS{
115+
"__uploads/CINVALID.json.gz": &fstest.MapFile{
116+
Data: testutil.GZCompress(t, []byte("NaN")),
117+
},
118+
"__uploads/CVALID.json.gz": &fstest.MapFile{
119+
Data: testutil.GZCompress(t, testutil.MarshalJSON(t, channelInfos[1])),
120+
},
121+
"__avatars/CVALID.json.gz": &fstest.MapFile{
122+
Data: testutil.GZCompress(t, testutil.MarshalJSON(t, channelInfos[2])),
123+
},
124+
"somedir/CVALID.json.gz": &fstest.MapFile{
125+
Data: testutil.GZCompress(t, testutil.MarshalJSON(t, channelInfos[3])),
126+
},
127+
"CVALID.json.gz": &fstest.MapFile{
128+
Data: testutil.GZCompress(t, testutil.MarshalJSON(t, channelInfos[0])),
129+
},
130+
"CANOTHER.json.gz": &fstest.MapFile{
131+
Data: testutil.GZCompress(t, testutil.MarshalJSON(t, channelInfos[1])),
132+
},
136133
},
137-
}
138-
139-
dir := prepDir(t, testdir)
140-
d, err := OpenDir(dir)
141-
if err != nil {
142-
t.Fatalf("OpenDir() error = %v", err)
143-
}
144-
defer d.Close()
145-
var seen []string
146-
if err := d.Walk(func(name string, f *File, err error) error {
134+
want: []string{
135+
"CVALID.json.gz",
136+
"CANOTHER.json.gz",
137+
},
138+
},
139+
}
140+
for _, tt := range tests {
141+
t.Run(tt.name, func(t *testing.T) {
142+
dir := testutil.PrepareTestDirectory(t, tt.fsys)
143+
d, err := OpenDir(dir)
147144
if err != nil {
148-
t.Fatalf("Walk() error = %v", err)
145+
t.Fatalf("OpenDir() error = %v", err)
149146
}
150-
if name == "C123INVALID.json.gz" {
151-
t.Fatal("should not be called for invalid json")
147+
defer d.Close()
148+
var seen []string
149+
if err := d.Walk(func(name string, f *File, err error) error {
150+
if err != nil {
151+
return err
152+
}
153+
if f == nil {
154+
return errors.New("file is nil")
155+
}
156+
t.Logf("name: %q, trimmed: %q", name, strings.TrimLeft(name, dir))
157+
seen = append(seen, strings.TrimLeft(name, dir))
158+
return nil
159+
}); (err != nil) != tt.wantErr {
160+
t.Fatalf("Walk() wantErr: %v, got error = %v", tt.wantErr, err)
152161
}
153-
if f == nil {
154-
t.Fatalf("Walk() file is nil")
155-
}
156-
seen = append(seen, name)
157-
return nil
158-
}); err != nil {
159-
t.Fatalf("Walk() error = %v", err)
160-
}
161-
if len(seen) != 2 {
162-
t.Fatalf("Walk() = %v, want 2", len(seen))
163-
}
164-
})
162+
assert.ElementsMatch(t, tt.want, seen)
163+
})
164+
}
165165
}

internal/testutil/compress.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package testutil
2+
3+
import (
4+
"bytes"
5+
"compress/gzip"
6+
"testing"
7+
)
8+
9+
// GZCompress compresses data using gzip and returns the compressed data.
10+
func GZCompress(t *testing.T, data []byte) []byte {
11+
t.Helper()
12+
var buf bytes.Buffer
13+
gz := gzip.NewWriter(&buf)
14+
defer gz.Close()
15+
if _, err := gz.Write(data); err != nil {
16+
t.Fatal(err)
17+
}
18+
if err := gz.Close(); err != nil {
19+
t.Fatal(err)
20+
}
21+
return buf.Bytes()
22+
}

internal/testutil/fs.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package testutil
22

33
import (
44
"io/fs"
5+
"os"
56
"testing"
67
)
78

@@ -32,3 +33,14 @@ func CollectFiles(t *testing.T, fsys fs.FS) (ret map[string]FileInfo) {
3233
}
3334
return
3435
}
36+
37+
// PrepareTestDirectory prepares a temporary directory for testing and populates it with
38+
// files from fsys. It returns the path to the directory.
39+
func PrepareTestDirectory(t *testing.T, fsys fs.FS) string {
40+
t.Helper()
41+
dir := t.TempDir()
42+
if err := os.CopyFS(dir, fsys); err != nil {
43+
t.Fatal(err)
44+
}
45+
return dir
46+
}

internal/testutil/json.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"testing"
66
)
77

8+
// MarshalJSON marshals data to JSON and returns the byte slice.
89
func MarshalJSON(t *testing.T, v any) []byte {
910
t.Helper()
1011
b, err := json.Marshal(v)

source/dump.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package source
22

33
import (
44
"context"
5+
"encoding/json"
56
"errors"
67
"io/fs"
78
"iter"
@@ -25,6 +26,14 @@ type Dump struct {
2526
files Storage
2627
}
2728

29+
// OpenDump opens the data in dump format (Slackdump v1.1.0+) from filesystem fsys, and the given
30+
// name. It will scan for file attachments.
31+
//
32+
// If you need to open a dump from Slackdump pre-v1.1.0, convert it first, with the following command:
33+
//
34+
// slackdump tools convertv1
35+
//
36+
// Note: slackdump pre-v1.1.0 dumps do not have threads.
2837
func OpenDump(ctx context.Context, fsys fs.FS, name string) (*Dump, error) {
2938
var st Storage = NoStorage{}
3039
if fst, err := NewDumpStorage(fsys); err == nil {
@@ -78,6 +87,11 @@ func (d Dump) Channels(context.Context) ([]slack.Channel, error) {
7887

7988
c, err := unmarshalOne[types.Conversation](d.fs, pth)
8089
if err != nil {
90+
var jsonErr *json.SyntaxError
91+
if errors.As(err, &jsonErr) {
92+
slog.Debug("skipping file with invalid JSON", "file", pth, "error", err)
93+
return nil
94+
}
8195
return err
8296
}
8397
cc = append(cc, slack.Channel{

source/dump_test.go

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"io/fs"
66
"testing"
7+
"testing/fstest"
78

89
"github.com/rusq/slack"
910
"github.com/stretchr/testify/assert"
@@ -69,6 +70,44 @@ func TestDump_Channels(t *testing.T) {
6970
},
7071
wantErr: false,
7172
},
73+
{
74+
name: "skips invalid json",
75+
fields: fields{
76+
fs: fstest.MapFS{
77+
"C12345678.json": &fstest.MapFile{ // invalid JSON
78+
Data: []byte("{invalid}"),
79+
},
80+
"G12345678.json": &fstest.MapFile{ // valid JSON
81+
Data: []byte(`{"channel_id":"G12345678","name":"test-group"}`), // note: dump format
82+
},
83+
"C12345679.json": &fstest.MapFile{
84+
Data: []byte(`{"channel_id":"C12345679","name":"public"}`),
85+
},
86+
},
87+
},
88+
args: args{
89+
in0: context.Background(),
90+
},
91+
want: []slack.Channel{
92+
{
93+
GroupConversation: slack.GroupConversation{
94+
Conversation: slack.Conversation{
95+
ID: "G12345678",
96+
},
97+
Name: "test-group",
98+
},
99+
},
100+
{
101+
GroupConversation: slack.GroupConversation{
102+
Conversation: slack.Conversation{
103+
ID: "C12345679",
104+
},
105+
Name: "public",
106+
},
107+
},
108+
},
109+
wantErr: false,
110+
},
72111
}
73112
for _, tt := range tests {
74113
t.Run(tt.name, func(t *testing.T) {
@@ -82,7 +121,7 @@ func TestDump_Channels(t *testing.T) {
82121
t.Errorf("Dump.Channels() error = %v, wantErr %v", err, tt.wantErr)
83122
return
84123
}
85-
assert.Equal(t, tt.want, got)
124+
assert.ElementsMatch(t, tt.want, got)
86125
})
87126
}
88127
}

source/export.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package source
22

33
import (
44
"context"
5+
"encoding/json"
56
"errors"
67
"fmt"
78
"io/fs"
@@ -135,12 +136,20 @@ func (e *Export) walkChannelMessages(channelID string) (iter.Seq2[slack.Message,
135136
if err != nil {
136137
return err
137138
}
138-
if d.IsDir() || path.Ext(pth) != ".json" {
139+
if d.IsDir() && pth != name {
140+
return fs.SkipDir
141+
}
142+
if path.Ext(pth) != ".json" {
139143
return nil
140144
}
141145
// read the file
142146
em, err := unmarshal[[]export.ExportMessage](e.fs, pth)
143147
if err != nil {
148+
var jsonErr *json.SyntaxError
149+
if errors.As(err, &jsonErr) {
150+
slog.Default().Debug("skipping a broken file", "pth", pth, "err", err)
151+
return nil
152+
}
144153
return err
145154
}
146155
for i, m := range em {

0 commit comments

Comments
 (0)