You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
260 lines
6.6 KiB
260 lines
6.6 KiB
// Copyright 2015 The Go Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
package rangetable |
|
|
|
import ( |
|
"unicode" |
|
) |
|
|
|
// atEnd is used to mark a completed iteration. |
|
const atEnd = unicode.MaxRune + 1 |
|
|
|
// Merge returns a new RangeTable that is the union of the given tables. |
|
// It can also be used to compact user-created RangeTables. The entries in |
|
// R16 and R32 for any given RangeTable should be sorted and non-overlapping. |
|
// |
|
// A lookup in the resulting table can be several times faster than using In |
|
// directly on the ranges. Merge is an expensive operation, however, and only |
|
// makes sense if one intends to use the result for more than a couple of |
|
// hundred lookups. |
|
func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable { |
|
rt := &unicode.RangeTable{} |
|
if len(ranges) == 0 { |
|
return rt |
|
} |
|
|
|
iter := tablesIter(make([]tableIndex, len(ranges))) |
|
|
|
for i, t := range ranges { |
|
iter[i] = tableIndex{t, 0, atEnd} |
|
if len(t.R16) > 0 { |
|
iter[i].next = rune(t.R16[0].Lo) |
|
} |
|
} |
|
|
|
if r0 := iter.next16(); r0.Stride != 0 { |
|
for { |
|
r1 := iter.next16() |
|
if r1.Stride == 0 { |
|
rt.R16 = append(rt.R16, r0) |
|
break |
|
} |
|
stride := r1.Lo - r0.Hi |
|
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) { |
|
// Fully merge the next range into the previous one. |
|
r0.Hi, r0.Stride = r1.Hi, stride |
|
continue |
|
} else if stride == r0.Stride { |
|
// Move the first element of r1 to r0. This may eliminate an |
|
// entry. |
|
r0.Hi = r1.Lo |
|
r0.Stride = stride |
|
r1.Lo = r1.Lo + r1.Stride |
|
if r1.Lo > r1.Hi { |
|
continue |
|
} |
|
} |
|
rt.R16 = append(rt.R16, r0) |
|
r0 = r1 |
|
} |
|
} |
|
|
|
for i, t := range ranges { |
|
iter[i] = tableIndex{t, 0, atEnd} |
|
if len(t.R32) > 0 { |
|
iter[i].next = rune(t.R32[0].Lo) |
|
} |
|
} |
|
|
|
if r0 := iter.next32(); r0.Stride != 0 { |
|
for { |
|
r1 := iter.next32() |
|
if r1.Stride == 0 { |
|
rt.R32 = append(rt.R32, r0) |
|
break |
|
} |
|
stride := r1.Lo - r0.Hi |
|
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) { |
|
// Fully merge the next range into the previous one. |
|
r0.Hi, r0.Stride = r1.Hi, stride |
|
continue |
|
} else if stride == r0.Stride { |
|
// Move the first element of r1 to r0. This may eliminate an |
|
// entry. |
|
r0.Hi = r1.Lo |
|
r1.Lo = r1.Lo + r1.Stride |
|
if r1.Lo > r1.Hi { |
|
continue |
|
} |
|
} |
|
rt.R32 = append(rt.R32, r0) |
|
r0 = r1 |
|
} |
|
} |
|
|
|
for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ { |
|
rt.LatinOffset = i + 1 |
|
} |
|
|
|
return rt |
|
} |
|
|
|
type tableIndex struct { |
|
t *unicode.RangeTable |
|
p uint32 |
|
next rune |
|
} |
|
|
|
type tablesIter []tableIndex |
|
|
|
// sortIter does an insertion sort using the next field of tableIndex. Insertion |
|
// sort is a good sorting algorithm for this case. |
|
func sortIter(t []tableIndex) { |
|
for i := range t { |
|
for j := i; j > 0 && t[j-1].next > t[j].next; j-- { |
|
t[j], t[j-1] = t[j-1], t[j] |
|
} |
|
} |
|
} |
|
|
|
// next16 finds the ranged to be added to the table. If ranges overlap between |
|
// multiple tables it clips the result to a non-overlapping range if the |
|
// elements are not fully subsumed. It returns a zero range if there are no more |
|
// ranges. |
|
func (ti tablesIter) next16() unicode.Range16 { |
|
sortIter(ti) |
|
|
|
t0 := ti[0] |
|
if t0.next == atEnd { |
|
return unicode.Range16{} |
|
} |
|
r0 := t0.t.R16[t0.p] |
|
r0.Lo = uint16(t0.next) |
|
|
|
// We restrict the Hi of the current range if it overlaps with another range. |
|
for i := range ti { |
|
tn := ti[i] |
|
// Since our tableIndices are sorted by next, we can break if the there |
|
// is no overlap. The first value of a next range can always be merged |
|
// into the current one, so we can break in case of equality as well. |
|
if rune(r0.Hi) <= tn.next { |
|
break |
|
} |
|
rn := tn.t.R16[tn.p] |
|
rn.Lo = uint16(tn.next) |
|
|
|
// Limit r0.Hi based on next ranges in list, but allow it to overlap |
|
// with ranges as long as it subsumes it. |
|
m := (rn.Lo - r0.Lo) % r0.Stride |
|
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) { |
|
// Overlap, take the min of the two Hi values: for simplicity's sake |
|
// we only process one range at a time. |
|
if r0.Hi > rn.Hi { |
|
r0.Hi = rn.Hi |
|
} |
|
} else { |
|
// Not a compatible stride. Set to the last possible value before |
|
// rn.Lo, but ensure there is at least one value. |
|
if x := rn.Lo - m; r0.Lo <= x { |
|
r0.Hi = x |
|
} |
|
break |
|
} |
|
} |
|
|
|
// Update the next values for each table. |
|
for i := range ti { |
|
tn := &ti[i] |
|
if rune(r0.Hi) < tn.next { |
|
break |
|
} |
|
rn := tn.t.R16[tn.p] |
|
stride := rune(rn.Stride) |
|
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride)) |
|
if rune(rn.Hi) < tn.next { |
|
if tn.p++; int(tn.p) == len(tn.t.R16) { |
|
tn.next = atEnd |
|
} else { |
|
tn.next = rune(tn.t.R16[tn.p].Lo) |
|
} |
|
} |
|
} |
|
|
|
if r0.Lo == r0.Hi { |
|
r0.Stride = 1 |
|
} |
|
|
|
return r0 |
|
} |
|
|
|
// next32 finds the ranged to be added to the table. If ranges overlap between |
|
// multiple tables it clips the result to a non-overlapping range if the |
|
// elements are not fully subsumed. It returns a zero range if there are no more |
|
// ranges. |
|
func (ti tablesIter) next32() unicode.Range32 { |
|
sortIter(ti) |
|
|
|
t0 := ti[0] |
|
if t0.next == atEnd { |
|
return unicode.Range32{} |
|
} |
|
r0 := t0.t.R32[t0.p] |
|
r0.Lo = uint32(t0.next) |
|
|
|
// We restrict the Hi of the current range if it overlaps with another range. |
|
for i := range ti { |
|
tn := ti[i] |
|
// Since our tableIndices are sorted by next, we can break if the there |
|
// is no overlap. The first value of a next range can always be merged |
|
// into the current one, so we can break in case of equality as well. |
|
if rune(r0.Hi) <= tn.next { |
|
break |
|
} |
|
rn := tn.t.R32[tn.p] |
|
rn.Lo = uint32(tn.next) |
|
|
|
// Limit r0.Hi based on next ranges in list, but allow it to overlap |
|
// with ranges as long as it subsumes it. |
|
m := (rn.Lo - r0.Lo) % r0.Stride |
|
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) { |
|
// Overlap, take the min of the two Hi values: for simplicity's sake |
|
// we only process one range at a time. |
|
if r0.Hi > rn.Hi { |
|
r0.Hi = rn.Hi |
|
} |
|
} else { |
|
// Not a compatible stride. Set to the last possible value before |
|
// rn.Lo, but ensure there is at least one value. |
|
if x := rn.Lo - m; r0.Lo <= x { |
|
r0.Hi = x |
|
} |
|
break |
|
} |
|
} |
|
|
|
// Update the next values for each table. |
|
for i := range ti { |
|
tn := &ti[i] |
|
if rune(r0.Hi) < tn.next { |
|
break |
|
} |
|
rn := tn.t.R32[tn.p] |
|
stride := rune(rn.Stride) |
|
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride)) |
|
if rune(rn.Hi) < tn.next { |
|
if tn.p++; int(tn.p) == len(tn.t.R32) { |
|
tn.next = atEnd |
|
} else { |
|
tn.next = rune(tn.t.R32[tn.p].Lo) |
|
} |
|
} |
|
} |
|
|
|
if r0.Lo == r0.Hi { |
|
r0.Stride = 1 |
|
} |
|
|
|
return r0 |
|
}
|
|
|