Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion FUTURE-CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@
* GetElementsByClassName
* GetElementsByTagName
* GetElementsById
* QueryList
* QuerySelector
* QuerySelectorAll
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/udan-jayanith/GoHTML

go 1.24.1
go 1.25.0

require github.com/emirpasic/gods v1.18.1

Expand Down
7 changes: 4 additions & 3 deletions node-tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,14 @@ func (node *Node) SetTagName(tagName string) {

// GetAttribute returns the specified attribute value form the node. If the specified attribute doesn't exists GetAttribute returns a empty string and false.
func (node *Node) GetAttribute(attributeName string) (string, bool) {
v, ok := node.attributes[attributeName]
v, ok := node.attributes[strings.TrimSpace(strings.ToLower(attributeName))]
return v, ok
}

// RemoveAttribute remove or delete the specified attribute.
func (node *Node) RemoveAttribute(attributeName string) {
delete(node.attributes, attributeName)
delete(node.attributes, strings.TrimSpace(strings.ToLower(attributeName)))

}

// IterateAttributes calls callback at every attribute in the node by passing attribute and value of the node.
Expand All @@ -87,7 +88,7 @@ func (node *Node) IterateAttributes(callback func(attribute, value string)) {

// SetAttribute add a attribute to the node.
func (node *Node) SetAttribute(attribute, value string) {
node.attributes[strings.TrimSpace(attribute)] = strings.TrimSpace(value)
node.attributes[strings.ToLower(strings.TrimSpace(attribute))] = strings.TrimSpace(value)
}

// GetText returns text on the node. This does not returns text on it's child nodes. If you also wants child nodes text use GetInnerText method on the node.
Expand Down
121 changes: 112 additions & 9 deletions querying.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ func (node *Node) GetElementByClassName(className string) *Node {
}

// GetElementByID returns the first node that match with the given idName by advancing from the node.
func (node *Node) GetElementByID(idName string) *Node{
func (node *Node) GetElementByID(idName string) *Node {
traverser := NewTraverser(node)
var returnNode *Node
traverser.Walkthrough(func(node *Node) TraverseCondition {
id, _ := node.GetAttribute("id")
if id == idName{
id, _ := node.GetAttribute("id")
if id == idName {
returnNode = node
return StopWalkthrough
}
Expand All @@ -54,15 +54,15 @@ func (node *Node) GetElementByID(idName string) *Node{
}

// GetElementsByClassName returns a NodeList containing nodes that have the given className from the node.
func (node *Node) GetElementsByClassName(className string) NodeList{
func (node *Node) GetElementsByClassName(className string) NodeList {
traverser := NewTraverser(node)
nodeList := NewNodeList()

traverser.Walkthrough(func(node *Node) TraverseCondition {
classList := NewClassList()
classList.DecodeFrom(node)

if classList.Contains(className){
if classList.Contains(className) {
nodeList.Append(node)
}
return ContinueWalkthrough
Expand All @@ -71,12 +71,12 @@ func (node *Node) GetElementsByClassName(className string) NodeList{
}

// GetElementsByTagName returns a NodeList containing nodes that have the given tagName from the node.
func (node *Node) GetElementsByTagName(tagName string) NodeList{
func (node *Node) GetElementsByTagName(tagName string) NodeList {
traverser := NewTraverser(node)
nodeList := NewNodeList()

traverser.Walkthrough(func(node *Node) TraverseCondition {
if node.GetTagName() == tagName{
if node.GetTagName() == tagName {
nodeList.Append(node)
}
return ContinueWalkthrough
Expand All @@ -85,16 +85,119 @@ func (node *Node) GetElementsByTagName(tagName string) NodeList{
}

// GetElementsByClassName returns a NodeList containing nodes that have the given idName from the node.
func (node *Node) GetElementsById(idName string) NodeList{
func (node *Node) GetElementsById(idName string) NodeList {
traverser := NewTraverser(node)
nodeList := NewNodeList()

traverser.Walkthrough(func(node *Node) TraverseCondition {
id, _ := node.GetAttribute("id")
if id == idName{
if id == idName {
nodeList.Append(node)
}
return ContinueWalkthrough
})
return nodeList
}

// Selector types
const (
Id int = iota
Tag
Class
)

// QueryToken store data about basic css selectors(ids, classes, tags).
type QueryToken struct {
Type int
SelectorName string
Selector string
}

// TokenizeQuery tokenizes the query and returns a list of QueryToken.
func TokenizeQuery(query string) []QueryToken {
slice := make([]QueryToken, 0, 1)
if strings.TrimSpace(query) == "" {
return slice
}

iter := strings.SplitSeq(query, " ")
for sec := range iter {
token := QueryToken{}
switch sec {
case "", " ", ".", "#":
continue
}

switch string(sec[0]) {
case ".":
token.Type = Class
token.SelectorName = sec[1:]
case "#":
token.Type = Id
token.SelectorName = sec[1:]
default:
token.Type = Tag
token.SelectorName = sec
}
token.Selector = sec
slice = append(slice, token)
}

return slice
}

func matchQueryTokens(node *Node, queryTokens []QueryToken) bool {
if len(queryTokens) == 0 {
return false
}
classList := NewClassList()
classList.DecodeFrom(node)
for _, token := range queryTokens {
switch token.Type {
case Id:
idName, _ := node.GetAttribute("id")
if token.SelectorName != idName {
return false
}
case Tag:
if node.GetTagName() != token.SelectorName {
return false
}
case Class:
if !classList.Contains(token.SelectorName) {
return false
}
}
}
return true
}

// QuerySelector returns the first node that matches with the give node.
func (node *Node) QuerySelector(query string) *Node {
queryTokens := TokenizeQuery(query)

traverser := NewTraverser(node)
var res *Node
traverser.Walkthrough(func(node *Node) TraverseCondition {
if matchQueryTokens(node, queryTokens) {
res = node
return StopWalkthrough
}
return ContinueWalkthrough
})
return res
}

// QuerySelectorAll returns a NodeList containing nodes that matched with the given query.
func (node *Node) QuerySelectorAll(query string) NodeList{
nodeList := NewNodeList()
queryTokens := TokenizeQuery(query)
traverser := NewTraverser(node)

for node := range traverser.Walkthrough{
if matchQueryTokens(node, queryTokens) {
nodeList.Append(node)
}
}
return nodeList
}
59 changes: 57 additions & 2 deletions querying_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func TestGetElementByClassName(t *testing.T) {
node = node.GetElementByClassName("ordered-item")
if node == nil {
t.Fatal("Node is nil")
}else if node.GetInnerText() != "Apple" {
} else if node.GetInnerText() != "Apple" {
t.Fatal("Expected Apple but got ", node.GetInnerText())
}
}
Expand All @@ -66,7 +66,7 @@ func TestGetElementByTagName(t *testing.T) {
node = node.GetElementByTagName("h2")
if node == nil {
t.Fatal("Node is nil")
}else if node.GetInnerText() != "List 1"{
} else if node.GetInnerText() != "List 1" {
t.Fatal("Expected List 1 but got ", node.GetInnerText())
}
}
Expand Down Expand Up @@ -137,3 +137,58 @@ func TestGetElementsById(t *testing.T) {
}
}
}

func TestSelectorTokenizer(t *testing.T) {
stack := linkedliststack.New()
stack.Push("article .content")
stack.Push("article p h1")
stack.Push("article p")
stack.Push(".title #user")
stack.Push("#user title .title-1")

for stack.Size() > 0 {
val, _ := stack.Pop()
selector := val.(string)

tokens := GoHtml.TokenizeQuery(selector)
s := ""
for _, token := range tokens {
if s == "" {
s += token.Selector
} else {
s += " " + token.Selector
}
}

if s != selector {
t.Fatal("Expected ", selector, "but got", s)
}
}
}

func TestQuerySelector(t *testing.T) {
node, err := testFile4NodeTree()
if err != nil {
t.Fatal(err)
return
}
imgEl := node.QuerySelector("img #idElement")
imgSrc, _ := imgEl.GetAttribute("src")
imgAlt, _ := imgEl.GetAttribute("alt")
if imgSrc != "" || imgAlt != "" {
t.Fatal("")
}
}

func TestQuerySelectorAll(t *testing.T) {
node, err := testFile4NodeTree()
if err != nil {
t.Fatal(err)
return
}

nodeList := node.QuerySelectorAll("h2")
if nodeList.Len() != 2{
t.Fatal("")
}
}
13 changes: 7 additions & 6 deletions traverser.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,16 @@ func (t *Traverser) Previous() *Node {
return t.GetCurrentNode()
}

type TraverseCondition bool
type TraverseCondition = bool

const (
StopWalkthrough TraverseCondition = true
ContinueWalkthrough TraverseCondition = false
StopWalkthrough TraverseCondition = false
ContinueWalkthrough TraverseCondition = true
)

// Walkthrough traverse the node tree from the current node to the end of the node tree by visiting every node. If callback returned StopWalkthrough walkthrough function will stop else if it returned ContinueWalkthrough it advanced to the next node.
// Walkthrough calls callback at every node and pass that node. Walkthrough traverse the node tree similar to DFS without visiting visited nodes iteratively.
// Walkthrough traverse the node tree from the current node to the end of the node tree by visiting every node.
// Walkthrough traverse the node tree similar to DFS without visiting visited nodes iteratively.
// Walkthrough can be used as a range over iterator or a function that takes a callback and pass every node one by one.
func (t *Traverser) Walkthrough(callback func(node *Node) TraverseCondition) {
stack := linkedliststack.New()
if t.GetCurrentNode() == nil {
Expand All @@ -60,7 +61,7 @@ func (t *Traverser) Walkthrough(callback func(node *Node) TraverseCondition) {

for stack.Size() > 0 {
currentNode, _ := stack.Pop()
if callback(currentNode.(*Node)) == StopWalkthrough {
if !callback(currentNode.(*Node)) {
return
}

Expand Down
2 changes: 1 addition & 1 deletion traverser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func TestWalkthrough(t *testing.T) {
traverser := GoHtml.NewTraverser(body)

resList := make([]*GoHtml.Node, 0)
traverser.Walkthrough(func(node *GoHtml.Node) GoHtml.TraverseCondition {
traverser.Walkthrough(func(node *GoHtml.Node) bool {
resList = append(resList, node)
return GoHtml.ContinueWalkthrough
})
Expand Down