Browse Source

Initial commit

Ryan Armstrong 4 years ago
commit
c78c931ab9
5 changed files with 332 additions and 0 deletions
  1. 34 0
      README.md
  2. 161 0
      client.go
  3. 20 0
      client_test.go
  4. 72 0
      download.go
  5. 45 0
      error.go

+ 34 - 0
README.md

@@ -0,0 +1,34 @@
+# grab
+
+__Downloading the internet, one go routine at a time!__
+
+Grab is a Go package for downloading files from the internet with the following
+rad features:
+
+* Monitor download progress asyncronously
+* Auto-resume incomplete downloads
+* Deduce filename from content header or URL
+* Safely cancel downloads
+* Validate downloads using checksums
+* Download batches of files asyncronously
+
+## License
+
+Copyright (c) 2015 Ryan Armstrong
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+ 161 - 0
client.go

@@ -0,0 +1,161 @@
+package grab
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path"
+	"path/filepath"
+	"sync/atomic"
+)
+
+type Client struct {
+	client *http.Client
+
+	userAgent string
+}
+
+func NewClient(userAgent string) *Client {
+	return &Client{
+		userAgent: userAgent,
+		client: &http.Client{
+			Transport: &http.Transport{
+				Proxy: http.ProxyFromEnvironment,
+			},
+		},
+	}
+}
+
+func (c *Client) SetHTTPClient(client *http.Client) {
+	c.client = client
+}
+
+func (c *Client) Do(d *Download) error {
+
+	// default to current working directory
+	if d.filepath == "" {
+		d.filepath = "."
+	}
+
+	// see if file is a directory
+	needFilename := false
+	if fi, err := os.Stat(d.filepath); err != nil {
+		return err
+	} else {
+		if fi.IsDir() {
+			// destination is a directory - compute a file name
+			needFilename = true
+		}
+	}
+
+	// configure client request
+	if c.userAgent != "" {
+		d.req.Header.Set("User-Agent", c.userAgent)
+	}
+
+	// switch the request to HEAD metho
+	method := d.req.Method
+	d.req.Method = "HEAD"
+
+	// get file metadata
+	canResume := false
+	if resp, err := c.client.Do(d.req); err == nil && (resp.StatusCode >= 200 && resp.StatusCode < 300) {
+		// update or validate content length
+		if d.size == 0 && resp.ContentLength > 0 {
+			d.size = uint64(resp.ContentLength)
+		} else if d.size > 0 && resp.ContentLength > 0 && d.size != uint64(resp.ContentLength) {
+			return errorf(errBadLength, "Bad content length: %d, expected %d", resp.ContentLength, d.size)
+		}
+
+		// does server supports resuming downloads?
+		if resp.Header.Get("Accept-Ranges") == "bytes" {
+			canResume = true
+		}
+
+		// TODO: get filename from Content-Disposition header
+	}
+
+	// compute filename from URL if still needed
+	if needFilename {
+		filename := path.Base(d.url.Path)
+		if filename == "" {
+			return errorf(errNoFilename, "No filename could be determined")
+		} else {
+			// update filepath with filename from URL
+			d.filepath = filepath.Join(d.filepath, filename)
+		}
+	}
+
+	// open destination for writing
+	f, err := os.OpenFile(d.filepath, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0644)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	// seek to the start of the file
+	d.progress = 0
+	if _, err := f.Seek(0, 0); err != nil {
+		return err
+	}
+
+	// attempt to resume previous download (if any)
+	if canResume {
+		if fi, err := f.Stat(); err != nil {
+			return err
+		} else if fi.Size() > 0 {
+			// seek to end of file
+			if _, err = f.Seek(0, os.SEEK_END); err != nil {
+				return err
+			} else {
+				d.progress = uint64(fi.Size())
+
+				// set byte range header in next request
+				d.req.Header.Set("Range", fmt.Sprintf("bytes=%d-", fi.Size()))
+			}
+		}
+	}
+
+	// skip if already downloaded
+	if d.size > 0 && d.size == d.progress {
+		return nil
+	}
+
+	// reset request and get file content
+	d.req.Method = method
+	resp, err := c.client.Do(d.req)
+	if err != nil {
+		return err
+	}
+
+	// validate content length
+	if d.size > 0 && d.size != (d.progress+uint64(resp.ContentLength)) {
+		return errorf(errBadLength, "Bad content length: %d, expected %d", resp.ContentLength, d.size-d.progress)
+	}
+
+	// download and update progress
+	var buffer [4096]byte
+	for {
+		// read HTTP stream
+		n, err := resp.Body.Read(buffer[:])
+		if err != nil && err != io.EOF {
+			return err
+		}
+
+		// increment progress
+		atomic.AddUint64(&d.progress, uint64(n))
+
+		// write to file
+		if _, werr := f.Write(buffer[:n]); werr != nil {
+			return werr
+		}
+
+		// break when finished
+		if err == io.EOF {
+			break
+		}
+	}
+
+	return nil
+}

+ 20 - 0
client_test.go

@@ -0,0 +1,20 @@
+package grab
+
+import (
+	"testing"
+)
+
+func TestClient_do(t *testing.T) {
+	url := "http://mirror.centos.org/centos/7/updates/x86_64/repodata/3a2896e638c89f478598fab313a444b84146f363d275ae7b7330fc8998246b2f-filelists.sqlite.bz2"
+
+	client := NewClient("grab test")
+
+	d, err := NewDownload(".", url, 0, nil, nil)
+	if err != nil {
+		t.Fatalf("Error initializing download: %v", err)
+	}
+
+	if err := client.Do(d); err != nil {
+		t.Fatalf("Error with download: %v", err)
+	}
+}

+ 72 - 0
download.go

@@ -0,0 +1,72 @@
+package grab
+
+import (
+	"hash"
+	"net/http"
+	"net/url"
+	"sync/atomic"
+)
+
+// Download defines a single file download operation with its source URL,
+// destination file path, progress and checksum information.
+type Download struct {
+	url      *url.URL
+	req      *http.Request
+	filepath string
+	size     uint64
+	progress uint64
+	algo     hash.Hash
+	checksum []byte
+}
+
+// Downloads is a slice of Downloads interfaces.
+type Downloads []*Download
+
+// download is a private implementation of the Download interface.
+
+func NewDownload(dst, src string, size uint64, algo hash.Hash, checksum []byte) (*Download, error) {
+	// create http request
+	req, err := http.NewRequest("GET", src, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	return &Download{
+		url:      req.URL,
+		req:      req,
+		filepath: dst,
+		size:     size,
+		algo:     algo,
+		checksum: checksum,
+	}, nil
+}
+
+func (c *Download) URL() *url.URL {
+	return c.url
+}
+
+// FilePath returns the local file path where the download will be stored.
+func (c *Download) FilePath() string {
+	return c.filepath
+}
+
+// Size returns the total number of bytes to be downloaded.
+func (c *Download) Size() uint64 {
+	return c.size
+}
+
+// Progress returns the number of bytes which have already been downloaded.
+func (c *Download) Progress() uint64 {
+	atomic.LoadUint64(&c.progress)
+	return c.progress
+}
+
+// ProgressRatio returns the ratio of bytes which have already been downloaded
+// over the total content length.
+func (c *Download) ProgressRatio() float64 {
+	if c.size == 0 {
+		return 0
+	}
+
+	return float64(atomic.LoadUint64(&c.progress)) / float64(c.size)
+}

+ 45 - 0
error.go

@@ -0,0 +1,45 @@
+package grab
+
+import (
+	"fmt"
+)
+
+const (
+	errBadLength = iota
+	errNoFilename
+)
+
+type grabError struct {
+	err  string
+	code int
+}
+
+func (c *grabError) Error() string {
+	return c.err
+}
+
+func errorf(code int, format string, a ...interface{}) error {
+	return &grabError{
+		err:  fmt.Sprintf(format, a...),
+		code: code,
+	}
+}
+
+// IsContentLengthMismatch returns a boolean indicating whether the error is
+// known to report that a HTTP request response indicated that the requested
+// file is not the expected length.
+func IsContentLengthMismatch(err error) bool {
+	if gerr, ok := err.(*grabError); ok {
+		return gerr.code == errBadLength
+	}
+
+	return false
+}
+
+func IsNoFilename(err error) bool {
+	if gerr, ok := err.(*grabError); ok {
+		return gerr.code == errNoFilename
+	}
+
+	return false
+}