You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
550 lines
14 KiB
550 lines
14 KiB
package oss |
|
|
|
import ( |
|
"crypto/md5" |
|
"encoding/base64" |
|
"encoding/json" |
|
"errors" |
|
"hash" |
|
"hash/crc64" |
|
"io" |
|
"io/ioutil" |
|
"os" |
|
"strconv" |
|
) |
|
|
|
// DownloadFile downloads files with multipart download. |
|
// |
|
// objectKey the object key. |
|
// filePath the local file to download from objectKey in OSS. |
|
// partSize the part size in bytes. |
|
// options object's constraints, check out GetObject for the reference. |
|
// |
|
// error it's nil when the call succeeds, otherwise it's an error object. |
|
// |
|
func (bucket Bucket) DownloadFile(objectKey, filePath string, partSize int64, options ...Option) error { |
|
if partSize < 1 { |
|
return errors.New("oss: part size smaller than 1") |
|
} |
|
|
|
cpConf, err := getCpConfig(options, filePath) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
uRange, err := getRangeConfig(options) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
routines := getRoutines(options) |
|
|
|
if cpConf.IsEnable { |
|
return bucket.downloadFileWithCp(objectKey, filePath, partSize, options, cpConf.FilePath, routines, uRange) |
|
} |
|
|
|
return bucket.downloadFile(objectKey, filePath, partSize, options, routines, uRange) |
|
} |
|
|
|
// getRangeConfig gets the download range from the options. |
|
func getRangeConfig(options []Option) (*unpackedRange, error) { |
|
rangeOpt, err := findOption(options, HTTPHeaderRange, nil) |
|
if err != nil || rangeOpt == nil { |
|
return nil, err |
|
} |
|
return parseRange(rangeOpt.(string)) |
|
} |
|
|
|
// ----- concurrent download without checkpoint ----- |
|
|
|
// downloadWorkerArg is download worker's parameters |
|
type downloadWorkerArg struct { |
|
bucket *Bucket |
|
key string |
|
filePath string |
|
options []Option |
|
hook downloadPartHook |
|
enableCRC bool |
|
} |
|
|
|
// downloadPartHook is hook for test |
|
type downloadPartHook func(part downloadPart) error |
|
|
|
var downloadPartHooker downloadPartHook = defaultDownloadPartHook |
|
|
|
func defaultDownloadPartHook(part downloadPart) error { |
|
return nil |
|
} |
|
|
|
// defaultDownloadProgressListener defines default ProgressListener, shields the ProgressListener in options of GetObject. |
|
type defaultDownloadProgressListener struct { |
|
} |
|
|
|
// ProgressChanged no-ops |
|
func (listener *defaultDownloadProgressListener) ProgressChanged(event *ProgressEvent) { |
|
} |
|
|
|
// downloadWorker |
|
func downloadWorker(id int, arg downloadWorkerArg, jobs <-chan downloadPart, results chan<- downloadPart, failed chan<- error, die <-chan bool) { |
|
for part := range jobs { |
|
if err := arg.hook(part); err != nil { |
|
failed <- err |
|
break |
|
} |
|
|
|
// Resolve options |
|
r := Range(part.Start, part.End) |
|
p := Progress(&defaultDownloadProgressListener{}) |
|
opts := make([]Option, len(arg.options)+2) |
|
// Append orderly, can not be reversed! |
|
opts = append(opts, arg.options...) |
|
opts = append(opts, r, p) |
|
|
|
rd, err := arg.bucket.GetObject(arg.key, opts...) |
|
if err != nil { |
|
failed <- err |
|
break |
|
} |
|
// defer rd.Close() |
|
|
|
var crcCalc hash.Hash64 |
|
if arg.enableCRC { |
|
crcCalc = crc64.New(crcTable()) |
|
contentLen := part.End - part.Start + 1 |
|
rd = ioutil.NopCloser(TeeReader(rd, crcCalc, contentLen, nil, nil)) |
|
} |
|
// defer rd.Close() |
|
|
|
select { |
|
case <-die: |
|
return |
|
default: |
|
} |
|
|
|
fd, err := os.OpenFile(arg.filePath, os.O_WRONLY, FilePermMode) |
|
if err != nil { |
|
failed <- err |
|
break |
|
} |
|
|
|
_, err = fd.Seek(part.Start-part.Offset, io.SeekStart) |
|
if err != nil { |
|
fd.Close() |
|
failed <- err |
|
break |
|
} |
|
|
|
_, err = io.Copy(fd, rd) |
|
if err != nil { |
|
fd.Close() |
|
failed <- err |
|
break |
|
} |
|
rd.Close() |
|
if arg.enableCRC { |
|
part.CRC64 = crcCalc.Sum64() |
|
} |
|
|
|
fd.Close() |
|
results <- part |
|
} |
|
} |
|
|
|
// downloadScheduler |
|
func downloadScheduler(jobs chan downloadPart, parts []downloadPart) { |
|
for _, part := range parts { |
|
jobs <- part |
|
} |
|
close(jobs) |
|
} |
|
|
|
// downloadPart defines download part |
|
type downloadPart struct { |
|
Index int // Part number, starting from 0 |
|
Start int64 // Start index |
|
End int64 // End index |
|
Offset int64 // Offset |
|
CRC64 uint64 // CRC check value of part |
|
} |
|
|
|
// getDownloadParts gets download parts |
|
func getDownloadParts(bucket *Bucket, objectKey string, partSize int64, uRange *unpackedRange) ([]downloadPart, bool, uint64, error) { |
|
meta, err := bucket.GetObjectDetailedMeta(objectKey) |
|
if err != nil { |
|
return nil, false, 0, err |
|
} |
|
|
|
parts := []downloadPart{} |
|
objectSize, err := strconv.ParseInt(meta.Get(HTTPHeaderContentLength), 10, 0) |
|
if err != nil { |
|
return nil, false, 0, err |
|
} |
|
|
|
enableCRC := false |
|
crcVal := (uint64)(0) |
|
if bucket.getConfig().IsEnableCRC && meta.Get(HTTPHeaderOssCRC64) != "" { |
|
if uRange == nil || (!uRange.hasStart && !uRange.hasEnd) { |
|
enableCRC = true |
|
crcVal, _ = strconv.ParseUint(meta.Get(HTTPHeaderOssCRC64), 10, 0) |
|
} |
|
} |
|
|
|
part := downloadPart{} |
|
i := 0 |
|
start, end := adjustRange(uRange, objectSize) |
|
for offset := start; offset < end; offset += partSize { |
|
part.Index = i |
|
part.Start = offset |
|
part.End = GetPartEnd(offset, end, partSize) |
|
part.Offset = start |
|
part.CRC64 = 0 |
|
parts = append(parts, part) |
|
i++ |
|
} |
|
return parts, enableCRC, crcVal, nil |
|
} |
|
|
|
// getObjectBytes gets object bytes length |
|
func getObjectBytes(parts []downloadPart) int64 { |
|
var ob int64 |
|
for _, part := range parts { |
|
ob += (part.End - part.Start + 1) |
|
} |
|
return ob |
|
} |
|
|
|
// combineCRCInParts caculates the total CRC of continuous parts |
|
func combineCRCInParts(dps []downloadPart) uint64 { |
|
if len(dps) == 0 { |
|
return 0 |
|
} |
|
|
|
crc := dps[0].CRC64 |
|
for i := 1; i < len(dps); i++ { |
|
crc = CRC64Combine(crc, dps[i].CRC64, (uint64)(dps[i].End-dps[i].Start+1)) |
|
} |
|
|
|
return crc |
|
} |
|
|
|
// downloadFile downloads file concurrently without checkpoint. |
|
func (bucket Bucket) downloadFile(objectKey, filePath string, partSize int64, options []Option, routines int, uRange *unpackedRange) error { |
|
tempFilePath := filePath + TempFileSuffix |
|
listener := getProgressListener(options) |
|
|
|
// If the file does not exist, create one. If exists, the download will overwrite it. |
|
fd, err := os.OpenFile(tempFilePath, os.O_WRONLY|os.O_CREATE, FilePermMode) |
|
if err != nil { |
|
return err |
|
} |
|
fd.Close() |
|
|
|
// Get the parts of the file |
|
parts, enableCRC, expectedCRC, err := getDownloadParts(&bucket, objectKey, partSize, uRange) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
jobs := make(chan downloadPart, len(parts)) |
|
results := make(chan downloadPart, len(parts)) |
|
failed := make(chan error) |
|
die := make(chan bool) |
|
|
|
var completedBytes int64 |
|
totalBytes := getObjectBytes(parts) |
|
event := newProgressEvent(TransferStartedEvent, 0, totalBytes) |
|
publishProgress(listener, event) |
|
|
|
// Start the download workers |
|
arg := downloadWorkerArg{&bucket, objectKey, tempFilePath, options, downloadPartHooker, enableCRC} |
|
for w := 1; w <= routines; w++ { |
|
go downloadWorker(w, arg, jobs, results, failed, die) |
|
} |
|
|
|
// Download parts concurrently |
|
go downloadScheduler(jobs, parts) |
|
|
|
// Waiting for parts download finished |
|
completed := 0 |
|
for completed < len(parts) { |
|
select { |
|
case part := <-results: |
|
completed++ |
|
completedBytes += (part.End - part.Start + 1) |
|
parts[part.Index].CRC64 = part.CRC64 |
|
event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes) |
|
publishProgress(listener, event) |
|
case err = <-failed: |
|
close(die) |
|
event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes) |
|
publishProgress(listener, event) |
|
return err |
|
} |
|
|
|
if completed >= len(parts) { |
|
break |
|
} |
|
} |
|
|
|
event = newProgressEvent(TransferCompletedEvent, completedBytes, totalBytes) |
|
publishProgress(listener, event) |
|
|
|
if enableCRC { |
|
actualCRC := combineCRCInParts(parts) |
|
err = checkDownloadCRC(actualCRC, expectedCRC) |
|
if err != nil { |
|
return err |
|
} |
|
} |
|
|
|
return os.Rename(tempFilePath, filePath) |
|
} |
|
|
|
// ----- Concurrent download with chcekpoint ----- |
|
|
|
const downloadCpMagic = "92611BED-89E2-46B6-89E5-72F273D4B0A3" |
|
|
|
type downloadCheckpoint struct { |
|
Magic string // Magic |
|
MD5 string // Checkpoint content MD5 |
|
FilePath string // Local file |
|
Object string // Key |
|
ObjStat objectStat // Object status |
|
Parts []downloadPart // All download parts |
|
PartStat []bool // Parts' download status |
|
Start int64 // Start point of the file |
|
End int64 // End point of the file |
|
enableCRC bool // Whether has CRC check |
|
CRC uint64 // CRC check value |
|
} |
|
|
|
type objectStat struct { |
|
Size int64 // Object size |
|
LastModified string // Last modified time |
|
Etag string // Etag |
|
} |
|
|
|
// isValid flags of checkpoint data is valid. It returns true when the data is valid and the checkpoint is valid and the object is not updated. |
|
func (cp downloadCheckpoint) isValid(bucket *Bucket, objectKey string, uRange *unpackedRange) (bool, error) { |
|
// Compare the CP's Magic and the MD5 |
|
cpb := cp |
|
cpb.MD5 = "" |
|
js, _ := json.Marshal(cpb) |
|
sum := md5.Sum(js) |
|
b64 := base64.StdEncoding.EncodeToString(sum[:]) |
|
|
|
if cp.Magic != downloadCpMagic || b64 != cp.MD5 { |
|
return false, nil |
|
} |
|
|
|
// Ensure the object is not updated. |
|
meta, err := bucket.GetObjectDetailedMeta(objectKey) |
|
if err != nil { |
|
return false, err |
|
} |
|
|
|
objectSize, err := strconv.ParseInt(meta.Get(HTTPHeaderContentLength), 10, 0) |
|
if err != nil { |
|
return false, err |
|
} |
|
|
|
// Compare the object size, last modified time and etag |
|
if cp.ObjStat.Size != objectSize || |
|
cp.ObjStat.LastModified != meta.Get(HTTPHeaderLastModified) || |
|
cp.ObjStat.Etag != meta.Get(HTTPHeaderEtag) { |
|
return false, nil |
|
} |
|
|
|
// Check the download range |
|
if uRange != nil { |
|
start, end := adjustRange(uRange, objectSize) |
|
if start != cp.Start || end != cp.End { |
|
return false, nil |
|
} |
|
} |
|
|
|
return true, nil |
|
} |
|
|
|
// load checkpoint from local file |
|
func (cp *downloadCheckpoint) load(filePath string) error { |
|
contents, err := ioutil.ReadFile(filePath) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
err = json.Unmarshal(contents, cp) |
|
return err |
|
} |
|
|
|
// dump funciton dumps to file |
|
func (cp *downloadCheckpoint) dump(filePath string) error { |
|
bcp := *cp |
|
|
|
// Calculate MD5 |
|
bcp.MD5 = "" |
|
js, err := json.Marshal(bcp) |
|
if err != nil { |
|
return err |
|
} |
|
sum := md5.Sum(js) |
|
b64 := base64.StdEncoding.EncodeToString(sum[:]) |
|
bcp.MD5 = b64 |
|
|
|
// Serialize |
|
js, err = json.Marshal(bcp) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
// Dump |
|
return ioutil.WriteFile(filePath, js, FilePermMode) |
|
} |
|
|
|
// todoParts gets unfinished parts |
|
func (cp downloadCheckpoint) todoParts() []downloadPart { |
|
dps := []downloadPart{} |
|
for i, ps := range cp.PartStat { |
|
if !ps { |
|
dps = append(dps, cp.Parts[i]) |
|
} |
|
} |
|
return dps |
|
} |
|
|
|
// getCompletedBytes gets completed size |
|
func (cp downloadCheckpoint) getCompletedBytes() int64 { |
|
var completedBytes int64 |
|
for i, part := range cp.Parts { |
|
if cp.PartStat[i] { |
|
completedBytes += (part.End - part.Start + 1) |
|
} |
|
} |
|
return completedBytes |
|
} |
|
|
|
// prepare initiates download tasks |
|
func (cp *downloadCheckpoint) prepare(bucket *Bucket, objectKey, filePath string, partSize int64, uRange *unpackedRange) error { |
|
// CP |
|
cp.Magic = downloadCpMagic |
|
cp.FilePath = filePath |
|
cp.Object = objectKey |
|
|
|
// Object |
|
meta, err := bucket.GetObjectDetailedMeta(objectKey) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
objectSize, err := strconv.ParseInt(meta.Get(HTTPHeaderContentLength), 10, 0) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
cp.ObjStat.Size = objectSize |
|
cp.ObjStat.LastModified = meta.Get(HTTPHeaderLastModified) |
|
cp.ObjStat.Etag = meta.Get(HTTPHeaderEtag) |
|
|
|
// Parts |
|
cp.Parts, cp.enableCRC, cp.CRC, err = getDownloadParts(bucket, objectKey, partSize, uRange) |
|
if err != nil { |
|
return err |
|
} |
|
cp.PartStat = make([]bool, len(cp.Parts)) |
|
for i := range cp.PartStat { |
|
cp.PartStat[i] = false |
|
} |
|
|
|
return nil |
|
} |
|
|
|
func (cp *downloadCheckpoint) complete(cpFilePath, downFilepath string) error { |
|
os.Remove(cpFilePath) |
|
return os.Rename(downFilepath, cp.FilePath) |
|
} |
|
|
|
// downloadFileWithCp downloads files with checkpoint. |
|
func (bucket Bucket) downloadFileWithCp(objectKey, filePath string, partSize int64, options []Option, cpFilePath string, routines int, uRange *unpackedRange) error { |
|
tempFilePath := filePath + TempFileSuffix |
|
listener := getProgressListener(options) |
|
|
|
// Load checkpoint data. |
|
dcp := downloadCheckpoint{} |
|
err := dcp.load(cpFilePath) |
|
if err != nil { |
|
os.Remove(cpFilePath) |
|
} |
|
|
|
// Load error or data invalid. Re-initialize the download. |
|
valid, err := dcp.isValid(&bucket, objectKey, uRange) |
|
if err != nil || !valid { |
|
if err = dcp.prepare(&bucket, objectKey, filePath, partSize, uRange); err != nil { |
|
return err |
|
} |
|
os.Remove(cpFilePath) |
|
} |
|
|
|
// Create the file if not exists. Otherwise the parts download will overwrite it. |
|
fd, err := os.OpenFile(tempFilePath, os.O_WRONLY|os.O_CREATE, FilePermMode) |
|
if err != nil { |
|
return err |
|
} |
|
fd.Close() |
|
|
|
// Unfinished parts |
|
parts := dcp.todoParts() |
|
jobs := make(chan downloadPart, len(parts)) |
|
results := make(chan downloadPart, len(parts)) |
|
failed := make(chan error) |
|
die := make(chan bool) |
|
|
|
completedBytes := dcp.getCompletedBytes() |
|
event := newProgressEvent(TransferStartedEvent, completedBytes, dcp.ObjStat.Size) |
|
publishProgress(listener, event) |
|
|
|
// Start the download workers routine |
|
arg := downloadWorkerArg{&bucket, objectKey, tempFilePath, options, downloadPartHooker, dcp.enableCRC} |
|
for w := 1; w <= routines; w++ { |
|
go downloadWorker(w, arg, jobs, results, failed, die) |
|
} |
|
|
|
// Concurrently downloads parts |
|
go downloadScheduler(jobs, parts) |
|
|
|
// Wait for the parts download finished |
|
completed := 0 |
|
for completed < len(parts) { |
|
select { |
|
case part := <-results: |
|
completed++ |
|
dcp.PartStat[part.Index] = true |
|
dcp.Parts[part.Index].CRC64 = part.CRC64 |
|
dcp.dump(cpFilePath) |
|
completedBytes += (part.End - part.Start + 1) |
|
event = newProgressEvent(TransferDataEvent, completedBytes, dcp.ObjStat.Size) |
|
publishProgress(listener, event) |
|
case err = <-failed: |
|
close(die) |
|
event = newProgressEvent(TransferFailedEvent, completedBytes, dcp.ObjStat.Size) |
|
publishProgress(listener, event) |
|
return err |
|
} |
|
|
|
if completed >= len(parts) { |
|
break |
|
} |
|
} |
|
|
|
event = newProgressEvent(TransferCompletedEvent, completedBytes, dcp.ObjStat.Size) |
|
publishProgress(listener, event) |
|
|
|
if dcp.enableCRC { |
|
actualCRC := combineCRCInParts(dcp.Parts) |
|
err = checkDownloadCRC(actualCRC, dcp.CRC) |
|
if err != nil { |
|
return err |
|
} |
|
} |
|
|
|
return dcp.complete(cpFilePath, tempFilePath) |
|
}
|
|
|