cmd/article2md/present/args.go - Issue 133960044: go.tools/cmd/article2md: add tool to convert article fi...

Unified Diff: cmd/article2md/present/args.go

Issue 133960044: go.tools/cmd/article2md: add tool to convert article fi...

Patch Set: diff -r 6fc790e5bfa623b690e2ca62c1f64370d86a874e https://code.google.com/p/go.tools Created 10 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: cmd/article2md/present/args.go

===================================================================

new file mode 100644

--- /dev/null

+++ b/cmd/article2md/present/args.go

@@ -0,0 +1,229 @@

+// Use of this source code is governed by a BSD-style

+// license that can be found in the LICENSE file.

+package present

+import (

+ "errors"

+ "regexp"

+ "strconv"

+ "unicode/utf8"

+// This file is stolen from go/src/cmd/godoc/codewalk.go.

+// It's an evaluator for the file address syntax implemented by acme and sam,

+// but using Go-native regular expressions.

+// To keep things reasonably close, this version uses (?m:re) for all user-provided

+// regular expressions. That is the only change to the code from codewalk.go.

+// See http://plan9.bell-labs.com/sys/doc/sam/sam.html Table II

+// for details on the syntax.

+// addrToByte evaluates the given address starting at offset start in data.

+// It returns the lo and hi byte offset of the matched region within data.

+func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) {

+ if addr == "" {

+ lo, hi = start, len(data)

+ return

+ }

+ var (

+ dir byte

+ prevc byte

+ charOffset bool

+ )

+ lo = start

+ hi = start

+ for addr != "" && err == nil {

+ c := addr[0]

+ switch c {

+ default:

+ err = errors.New("invalid address syntax near " + string(c))

+ case ',':

+ if len(addr) == 1 {

+ hi = len(data)

+ } else {

+ _, hi, err = addrToByteRange(addr[1:], hi, data)

+ }

+ return

+ case '+', '-':

+ if prevc == '+' || prevc == '-' {

+ lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset)

+ }

+ dir = c

+ case '$':

+ lo = len(data)

+ hi = len(data)

+ if len(addr) > 1 {

+ dir = '+'

+ }

+ case '#':

+ charOffset = true

+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':

+ var i int

+ for i = 1; i < len(addr); i++ {

+ if addr[i] < '0' || addr[i] > '9' {

+ break

+ }

+ var n int

+ n, err = strconv.Atoi(addr[0:i])

+ if err != nil {

+ break

+ }

+ lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset)

+ dir = 0

+ charOffset = false

+ prevc = c

+ addr = addr[i:]

+ continue

+ case '/':

+ var i, j int

+ Regexp:

+ for i = 1; i < len(addr); i++ {

+ switch addr[i] {

+ case '\\':

+ i++

+ case '/':

+ j = i + 1

+ break Regexp

+ }

+ if j == 0 {

+ j = i

+ }

+ pattern := addr[1:i]

+ lo, hi, err = addrRegexp(data, lo, hi, dir, pattern)

+ prevc = c

+ addr = addr[j:]

+ continue

+ }

+ prevc = c

+ addr = addr[1:]

+ }

+ if err == nil && dir != 0 {

+ lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset)

+ }

+ if err != nil {

+ return 0, 0, err

+ }

+ return lo, hi, nil

+// addrNumber applies the given dir, n, and charOffset to the address lo, hi.

+// dir is '+' or '-', n is the count, and charOffset is true if the syntax

+// used was #n. Applying +n (or +#n) means to advance n lines

+// (or characters) after hi. Applying -n (or -#n) means to back up n lines

+// (or characters) before lo.

+// The return value is the new lo, hi.

+func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) {

+ switch dir {

+ case 0:

+ lo = 0

+ hi = 0

+ fallthrough

+ case '+':

+ if charOffset {

+ pos := hi

+ for ; n > 0 && pos < len(data); n-- {

+ _, size := utf8.DecodeRune(data[pos:])

+ pos += size

+ }

+ if n == 0 {

+ return pos, pos, nil

+ }

+ break

+ }

+ // find next beginning of line

+ if hi > 0 {

+ for hi < len(data) && data[hi-1] != '\n' {

+ hi++

+ }

+ lo = hi

+ if n == 0 {

+ return lo, hi, nil

+ }

+ for ; hi < len(data); hi++ {

+ if data[hi] != '\n' {

+ continue

+ }

+ switch n--; n {

+ case 1:

+ lo = hi + 1

+ case 0:

+ return lo, hi + 1, nil

+ }

+ case '-':

+ if charOffset {

+ // Scan backward for bytes that are not UTF-8 continuation bytes.

+ pos := lo

+ for ; pos > 0 && n > 0; pos-- {

+ if data[pos]&0xc0 != 0x80 {

+ n--

+ }

+ if n == 0 {

+ return pos, pos, nil

+ }

+ break

+ }

+ // find earlier beginning of line

+ for lo > 0 && data[lo-1] != '\n' {

+ lo--

+ }

+ hi = lo

+ if n == 0 {

+ return lo, hi, nil

+ }

+ for ; lo >= 0; lo-- {

+ if lo > 0 && data[lo-1] != '\n' {

+ continue

+ }

+ switch n--; n {

+ case 1:

+ hi = lo

+ case 0:

+ return lo, hi, nil

+ }

+ return 0, 0, errors.New("address out of range")

+// addrRegexp searches for pattern in the given direction starting at lo, hi.

+// The direction dir is '+' (search forward from hi) or '-' (search backward from lo).

+// Backward searches are unimplemented.

+func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) {

+ // We want ^ and $ to work as in sam/acme, so use ?m.

+ re, err := regexp.Compile("(?m:" + pattern + ")")

+ if err != nil {

+ return 0, 0, err

+ }

+ if dir == '-' {

+ // Could implement reverse search using binary search

+ // through file, but that seems like overkill.

+ return 0, 0, errors.New("reverse search not implemented")

+ }

+ m := re.FindIndex(data[hi:])

+ if len(m) > 0 {

+ m[0] += hi

+ m[1] += hi

+ } else if hi > 0 {

+ // No match. Wrap to beginning of data.

+ m = re.FindIndex(data)

+ }

+ if len(m) == 0 {

+ return 0, 0, errors.New("no match for " + pattern)

+ }

+ return m[0], m[1], nil

« no previous file with comments | « cmd/article2md/conv.go ('k') | cmd/article2md/present/caption.go » ('j') | no next file with comments »