Skip to content

Commit 17d45ee

Browse files
committed
Finishing touches
Added FastOptions as an alternative to DefaultOptions A few performance improvements A few bug fixes
1 parent d9132ab commit 17d45ee

File tree

9 files changed

+310
-63
lines changed

9 files changed

+310
-63
lines changed

README.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,27 @@ Currently has support for reading squashfs files and extracting files and folder
1111
Special thanks to <https://dr-emann.github.io/squashfs/> for some VERY important information in an easy to understand format.
1212
Thanks also to [distri's squashfs library](https://github.com/distr1/distri/tree/master/internal/squashfs) as I referenced it to figure some things out (and double check others).
1313

14+
## FUSE
15+
16+
As of `v1.0`, FUSE capabilities has been moved to [a separate library](https://github.com/CalebQ42/squashfuse).
17+
1418
## Limitations
1519

16-
* No Xattr parsing. This is simply because I haven't done any research on it and how to apply these in a pure go way.
20+
* No Xattr parsing.
1721
* Socket files are not extracted.
18-
* From my research, it seems like a socket file would be useless if it could be created. They are still exposed when fuse mounted.
22+
* From my research, it seems like a socket file would be useless if it could be created.
1923
* Fifo files are ignored on `darwin`
2024

2125
## Issues
2226

23-
* Significantly slower then `unsquashfs` when extracting folders (about 5 ~ 7 times slower on a ~100MB archive using zstd compression)
27+
* Significantly slower then `unsquashfs` when extracting folders
2428
* This seems to be related to above along with the general optimization of `unsquashfs` and it's compression libraries.
25-
* The larger the file's tree, the slower the extraction will be. Arch Linux's Live USB's airootfs.sfs takes ~35x longer for a full extraction.
29+
* Times seem to be largely dependent on file tree size and compression type.
30+
* My main testing image (~100MB) using Zstd takes about 6x longer.
31+
* An Arch Linux airootfs image (~780MB) using XZ compression with LZMA filters takes about 32x longer.
32+
* A Tensorflow docker image (~3.3GB) using Zstd takes about 12x longer.
33+
34+
Note: These numbers are using `FastOptions()`. `DefaultOptions()` takes about 2x longer.
2635

2736
## Recommendations on Usage
2837

extraction_options.go

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,47 @@ package squashfs
33
import (
44
"io"
55
"io/fs"
6-
"os"
6+
"runtime"
77

88
"github.com/CalebQ42/squashfs/internal/routinemanager"
99
)
1010

1111
type ExtractionOptions struct {
1212
manager *routinemanager.Manager
13-
LogOutput io.Writer //Where the verbose log should write. Defaults to os.Stdout.
13+
LogOutput io.Writer //Where the verbose log should write.
1414
DereferenceSymlink bool //Replace symlinks with the target file.
1515
UnbreakSymlink bool //Try to make sure symlinks remain unbroken when extracted, without changing the symlink.
1616
Verbose bool //Prints extra info to log on an error.
1717
IgnorePerm bool //Ignore file's permissions and instead use Perm.
1818
Perm fs.FileMode //Permission to use when IgnorePerm. Defaults to 0777.
19-
SimultaneousFiles uint16 //Number of files to process in parallel. Defaults to 10.
20-
ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Defaults to 10.
19+
SimultaneousFiles uint16 //Number of files to process in parallel. Default set based on runtime.NumCPU().
20+
ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Default set based on runtime.NumCPU().
2121
}
2222

23+
// The default extraction options.
2324
func DefaultOptions() *ExtractionOptions {
25+
cores := uint16(runtime.NumCPU() / 2)
26+
var files, routines uint16
27+
if cores <= 4 {
28+
files = 1
29+
routines = cores
30+
} else {
31+
files = cores - 4
32+
routines = 4
33+
}
34+
return &ExtractionOptions{
35+
Perm: 0777,
36+
SimultaneousFiles: files,
37+
ExtractionRoutines: routines,
38+
}
39+
}
40+
41+
// Less limited default options. Can run up 2x faster than DefaultOptions.
42+
// Tends to use all available CPU resources.
43+
func FastOptions() *ExtractionOptions {
2444
return &ExtractionOptions{
25-
LogOutput: os.Stdout,
2645
Perm: 0777,
27-
SimultaneousFiles: 10,
28-
ExtractionRoutines: 10,
46+
SimultaneousFiles: uint16(runtime.NumCPU()),
47+
ExtractionRoutines: uint16(runtime.NumCPU()),
2948
}
3049
}

file.go

Lines changed: 50 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ type File struct {
2727
dirsRead int
2828
}
2929

30+
// Creates a new *File from the given *squashfs.Base
31+
func (r *Reader) FileFromBase(b *squashfs.Base, parent *FS) *File {
32+
return &File{
33+
b: b,
34+
parent: parent,
35+
r: r,
36+
}
37+
}
38+
3039
func (f *File) FS() (*FS, error) {
3140
if !f.IsDir() {
3241
return nil, errors.New("not a directory")
@@ -179,6 +188,9 @@ func (f *File) deviceDevices() (maj uint32, min uint32) {
179188
}
180189

181190
func (f *File) path() string {
191+
if f.parent == nil {
192+
return f.b.Name
193+
}
182194
return filepath.Join(f.parent.path(), f.b.Name)
183195
}
184196

@@ -193,7 +205,16 @@ func (f *File) Extract(folder string) error {
193205
func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
194206
if op.manager == nil {
195207
op.manager = routinemanager.NewManager(op.SimultaneousFiles)
196-
log.SetOutput(op.LogOutput)
208+
if op.LogOutput != nil {
209+
log.SetOutput(op.LogOutput)
210+
}
211+
err := os.MkdirAll(path, 0777)
212+
if err != nil {
213+
if op.Verbose {
214+
log.Println("Failed to create initial directory", path)
215+
}
216+
return err
217+
}
197218
}
198219
switch f.b.Inode.Type {
199220
case inode.Dir, inode.EDir:
@@ -205,7 +226,6 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
205226
return errors.Join(errors.New("failed to create squashfs.Directory: "+path), err)
206227
}
207228
errChan := make(chan error, len(d.Entries))
208-
files := len(d.Entries)
209229
for i := range d.Entries {
210230
b, err := f.r.r.BaseFromEntry(d.Entries[i])
211231
if err != nil {
@@ -214,37 +234,39 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
214234
}
215235
return errors.Join(errors.New("failed to get base from entry: "+path), err)
216236
}
217-
if b.IsDir() {
218-
files--
219-
extDir := filepath.Join(path, b.Name)
220-
err = os.Mkdir(extDir, 0777)
221-
if err != nil {
222-
if op.Verbose {
223-
log.Println("Failed to create directory", path)
237+
go func(b *squashfs.Base, path string) {
238+
i := op.manager.Lock()
239+
if b.IsDir() {
240+
extDir := filepath.Join(path, b.Name)
241+
err = os.Mkdir(extDir, 0777)
242+
op.manager.Unlock(i)
243+
if err != nil {
244+
if op.Verbose {
245+
log.Println("Failed to create directory", path)
246+
}
247+
errChan <- errors.Join(errors.New("failed to create directory: "+path), err)
248+
return
224249
}
225-
return errors.Join(errors.New("failed to create directory: "+path), err)
226-
}
227-
err = f.ExtractWithOptions(extDir, op)
228-
if err != nil {
229-
if op.Verbose {
230-
log.Println("Failed to extract directory", path)
250+
err = f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent)).ExtractWithOptions(extDir, op)
251+
if err != nil {
252+
if op.Verbose {
253+
log.Println("Failed to extract directory", path)
254+
}
255+
errChan <- errors.Join(errors.New("failed to extract directory: "+path), err)
256+
return
231257
}
232-
return errors.Join(errors.New("failed to extract directory: "+path), err)
258+
errChan <- nil
259+
} else {
260+
fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent))
261+
err = fil.ExtractWithOptions(path, op)
262+
op.manager.Unlock(i)
263+
fil.Close()
264+
errChan <- err
233265
}
234-
} else {
235-
fil := &File{
236-
b: b,
237-
r: f.r,
238-
}
239-
go func(fil *File, folder string) {
240-
i := op.manager.Lock()
241-
defer op.manager.Unlock(i)
242-
errChan <- fil.ExtractWithOptions(folder, op)
243-
}(fil, path)
244-
}
266+
}(b, path)
245267
}
246268
var errCache []error
247-
for i := 0; i < files; i++ {
269+
for i := 0; i < len(d.Entries); i++ {
248270
err := <-errChan
249271
if err != nil {
250272
errCache = append(errCache, err)
@@ -278,9 +300,6 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
278300
}
279301
return errors.Join(errors.New("failed to write file: "+path), err)
280302
}
281-
if op.Verbose {
282-
log.Println(f.path(), "extracted to", path)
283-
}
284303
case inode.Sym, inode.ESym:
285304
symPath := f.SymlinkPath()
286305
if op.DereferenceSymlink {

fs.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ type FS struct {
2020
parent *FS
2121
}
2222

23+
// Creates a new *FS from the given squashfs.directory
24+
func (r *Reader) FSFromDirectory(d *squashfs.Directory, parent *FS) *FS {
25+
return &FS{
26+
d: d,
27+
r: r,
28+
parent: parent,
29+
}
30+
}
31+
2332
// Glob returns the name of the files at the given pattern.
2433
// All paths are relative to the FS.
2534
// Uses filepath.Match to compare names.
@@ -101,9 +110,9 @@ func (f *FS) Open(name string) (fs.File, error) {
101110
Path: name,
102111
Err: fs.ErrNotExist,
103112
}
113+
} else {
114+
return f.parent.Open(strings.Join(split[1:], "/"))
104115
}
105-
} else {
106-
return f.parent.Open(strings.Join(split[1:], "/"))
107116
}
108117
i, found := slices.BinarySearchFunc(f.d.Entries, split[0], func(e directory.Entry, name string) int {
109118
return strings.Compare(e.Name, name)
@@ -137,11 +146,7 @@ func (f *FS) Open(name string) (fs.File, error) {
137146
if err != nil {
138147
return nil, err
139148
}
140-
return (&FS{
141-
d: d,
142-
r: f.r,
143-
parent: f,
144-
}).Open(strings.Join(split[1:], "/"))
149+
return f.r.FSFromDirectory(d, f).Open(strings.Join(split[1:], "/"))
145150
}
146151

147152
// Returns all DirEntry's for the directory at name.

reader.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@ func NewReader(r io.ReaderAt) (*Reader, error) {
1717
if err != nil {
1818
return nil, err
1919
}
20-
return &Reader{
20+
out := &Reader{
2121
r: rdr,
22-
FS: &FS{
23-
d: rdr.Root,
24-
},
25-
}, nil
22+
}
23+
out.FS = &FS{
24+
d: rdr.Root,
25+
r: out,
26+
}
27+
return out, nil
2628
}
2729

2830
func (r *Reader) ModTime() time.Time {

squashfs/data/fullreader.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"io"
77
"math"
8+
"runtime"
89
"sync"
910

1011
"github.com/CalebQ42/squashfs/internal/decompress"
@@ -31,7 +32,7 @@ func NewFullReader(r io.ReaderAt, initialOffset int64, d decompress.Decompressor
3132
d: d,
3233
sizes: sizes,
3334
initialOffset: initialOffset,
34-
goroutineLimit: 10,
35+
goroutineLimit: uint16(runtime.NumCPU()),
3536
finalBlockSize: finalBlockSize,
3637
blockSize: blockSize,
3738
retPool: &sync.Pool{

squashfs/directory.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package squashfs
22

33
import (
44
"errors"
5-
"fmt"
65
"io/fs"
76
"path/filepath"
87
"slices"
@@ -22,7 +21,6 @@ type Directory struct {
2221
func (r *Reader) directoryFromRef(ref uint64, name string) (*Directory, error) {
2322
i, err := r.InodeFromRef(ref)
2423
if err != nil {
25-
fmt.Println("yo")
2624
return nil, err
2725
}
2826
var blockStart uint32

squashfs/reader_test.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package squashfs
1+
package squashfs_test
22

33
import (
44
"fmt"
@@ -8,6 +8,8 @@ import (
88
"os/exec"
99
"path/filepath"
1010
"testing"
11+
12+
"github.com/CalebQ42/squashfs/squashfs"
1113
)
1214

1315
const (
@@ -55,7 +57,7 @@ func TestReader(t *testing.T) {
5557
t.Fatal(err)
5658
}
5759
defer fil.Close()
58-
rdr, err := NewReader(fil)
60+
rdr, err := squashfs.NewReader(fil)
5961
if err != nil {
6062
t.Fatal(err)
6163
}
@@ -75,7 +77,7 @@ func TestSingleFile(t *testing.T) {
7577
t.Fatal(err)
7678
}
7779
defer fil.Close()
78-
rdr, err := NewReader(fil)
80+
rdr, err := squashfs.NewReader(fil)
7981
if err != nil {
8082
t.Fatal(err)
8183
}
@@ -90,7 +92,7 @@ func TestSingleFile(t *testing.T) {
9092
t.Fatal(err)
9193
}
9294

93-
func extractToDir(rdr *Reader, b *Base, folder string) error {
95+
func extractToDir(rdr *squashfs.Reader, b *squashfs.Base, folder string) error {
9496
path := filepath.Join(folder, b.Name)
9597
if b.IsDir() {
9698
d, err := b.ToDir(rdr)
@@ -101,7 +103,7 @@ func extractToDir(rdr *Reader, b *Base, folder string) error {
101103
if err != nil {
102104
return err
103105
}
104-
var nestBast *Base
106+
var nestBast *squashfs.Base
105107
for _, e := range d.Entries {
106108
nestBast, err = rdr.BaseFromEntry(e)
107109
if err != nil {
@@ -115,7 +117,6 @@ func extractToDir(rdr *Reader, b *Base, folder string) error {
115117
} else if b.IsRegular() {
116118
_, full, err := b.GetRegFileReaders(rdr)
117119
if err != nil {
118-
fmt.Println("yo", path)
119120
return err
120121
}
121122
fil, err := os.Create(path)

0 commit comments

Comments
 (0)