| Index: cmd/article2md/present/args.go |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/cmd/article2md/present/args.go |
| @@ -0,0 +1,229 @@ |
| +// Copyright 2012 The Go Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style |
| +// license that can be found in the LICENSE file. |
| + |
| +package present |
| + |
| +import ( |
| + "errors" |
| + "regexp" |
| + "strconv" |
| + "unicode/utf8" |
| +) |
| + |
| +// This file is stolen from go/src/cmd/godoc/codewalk.go. |
| +// It's an evaluator for the file address syntax implemented by acme and sam, |
| +// but using Go-native regular expressions. |
| +// To keep things reasonably close, this version uses (?m:re) for all user-provided |
| +// regular expressions. That is the only change to the code from codewalk.go. |
| +// See http://plan9.bell-labs.com/sys/doc/sam/sam.html Table II |
| +// for details on the syntax. |
| + |
| +// addrToByte evaluates the given address starting at offset start in data. |
| +// It returns the lo and hi byte offset of the matched region within data. |
| +func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) { |
| + if addr == "" { |
| + lo, hi = start, len(data) |
| + return |
| + } |
| + var ( |
| + dir byte |
| + prevc byte |
| + charOffset bool |
| + ) |
| + lo = start |
| + hi = start |
| + for addr != "" && err == nil { |
| + c := addr[0] |
| + switch c { |
| + default: |
| + err = errors.New("invalid address syntax near " + string(c)) |
| + case ',': |
| + if len(addr) == 1 { |
| + hi = len(data) |
| + } else { |
| + _, hi, err = addrToByteRange(addr[1:], hi, data) |
| + } |
| + return |
| + |
| + case '+', '-': |
| + if prevc == '+' || prevc == '-' { |
| + lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset) |
| + } |
| + dir = c |
| + |
| + case '$': |
| + lo = len(data) |
| + hi = len(data) |
| + if len(addr) > 1 { |
| + dir = '+' |
| + } |
| + |
| + case '#': |
| + charOffset = true |
| + |
| + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': |
| + var i int |
| + for i = 1; i < len(addr); i++ { |
| + if addr[i] < '0' || addr[i] > '9' { |
| + break |
| + } |
| + } |
| + var n int |
| + n, err = strconv.Atoi(addr[0:i]) |
| + if err != nil { |
| + break |
| + } |
| + lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset) |
| + dir = 0 |
| + charOffset = false |
| + prevc = c |
| + addr = addr[i:] |
| + continue |
| + |
| + case '/': |
| + var i, j int |
| + Regexp: |
| + for i = 1; i < len(addr); i++ { |
| + switch addr[i] { |
| + case '\\': |
| + i++ |
| + case '/': |
| + j = i + 1 |
| + break Regexp |
| + } |
| + } |
| + if j == 0 { |
| + j = i |
| + } |
| + pattern := addr[1:i] |
| + lo, hi, err = addrRegexp(data, lo, hi, dir, pattern) |
| + prevc = c |
| + addr = addr[j:] |
| + continue |
| + } |
| + prevc = c |
| + addr = addr[1:] |
| + } |
| + |
| + if err == nil && dir != 0 { |
| + lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset) |
| + } |
| + if err != nil { |
| + return 0, 0, err |
| + } |
| + return lo, hi, nil |
| +} |
| + |
| +// addrNumber applies the given dir, n, and charOffset to the address lo, hi. |
| +// dir is '+' or '-', n is the count, and charOffset is true if the syntax |
| +// used was #n. Applying +n (or +#n) means to advance n lines |
| +// (or characters) after hi. Applying -n (or -#n) means to back up n lines |
| +// (or characters) before lo. |
| +// The return value is the new lo, hi. |
| +func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) { |
| + switch dir { |
| + case 0: |
| + lo = 0 |
| + hi = 0 |
| + fallthrough |
| + |
| + case '+': |
| + if charOffset { |
| + pos := hi |
| + for ; n > 0 && pos < len(data); n-- { |
| + _, size := utf8.DecodeRune(data[pos:]) |
| + pos += size |
| + } |
| + if n == 0 { |
| + return pos, pos, nil |
| + } |
| + break |
| + } |
| + // find next beginning of line |
| + if hi > 0 { |
| + for hi < len(data) && data[hi-1] != '\n' { |
| + hi++ |
| + } |
| + } |
| + lo = hi |
| + if n == 0 { |
| + return lo, hi, nil |
| + } |
| + for ; hi < len(data); hi++ { |
| + if data[hi] != '\n' { |
| + continue |
| + } |
| + switch n--; n { |
| + case 1: |
| + lo = hi + 1 |
| + case 0: |
| + return lo, hi + 1, nil |
| + } |
| + } |
| + |
| + case '-': |
| + if charOffset { |
| + // Scan backward for bytes that are not UTF-8 continuation bytes. |
| + pos := lo |
| + for ; pos > 0 && n > 0; pos-- { |
| + if data[pos]&0xc0 != 0x80 { |
| + n-- |
| + } |
| + } |
| + if n == 0 { |
| + return pos, pos, nil |
| + } |
| + break |
| + } |
| + // find earlier beginning of line |
| + for lo > 0 && data[lo-1] != '\n' { |
| + lo-- |
| + } |
| + hi = lo |
| + if n == 0 { |
| + return lo, hi, nil |
| + } |
| + for ; lo >= 0; lo-- { |
| + if lo > 0 && data[lo-1] != '\n' { |
| + continue |
| + } |
| + switch n--; n { |
| + case 1: |
| + hi = lo |
| + case 0: |
| + return lo, hi, nil |
| + } |
| + } |
| + } |
| + |
| + return 0, 0, errors.New("address out of range") |
| +} |
| + |
| +// addrRegexp searches for pattern in the given direction starting at lo, hi. |
| +// The direction dir is '+' (search forward from hi) or '-' (search backward from lo). |
| +// Backward searches are unimplemented. |
| +func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) { |
| + // We want ^ and $ to work as in sam/acme, so use ?m. |
| + re, err := regexp.Compile("(?m:" + pattern + ")") |
| + if err != nil { |
| + return 0, 0, err |
| + } |
| + if dir == '-' { |
| + // Could implement reverse search using binary search |
| + // through file, but that seems like overkill. |
| + return 0, 0, errors.New("reverse search not implemented") |
| + } |
| + m := re.FindIndex(data[hi:]) |
| + if len(m) > 0 { |
| + m[0] += hi |
| + m[1] += hi |
| + } else if hi > 0 { |
| + // No match. Wrap to beginning of data. |
| + m = re.FindIndex(data) |
| + } |
| + if len(m) == 0 { |
| + return 0, 0, errors.New("no match for " + pattern) |
| + } |
| + return m[0], m[1], nil |
| +} |