Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ef59a25
Add closest
udan-jayanith Aug 16, 2025
8c107e4
Add QuerySelector and it need to be tested furthermore.
udan-jayanith Aug 16, 2025
dfd0e60
pop function bug fixed
udan-jayanith Aug 16, 2025
a3d00d4
Bug fixed and add QuerySearch. QuerySearch still have to be tested btw
udan-jayanith Aug 16, 2025
3a4713a
tokanizer CurrentNode to GetCurrentNode to match with the library schema
udan-jayanith Aug 16, 2025
771f70c
Fixed deprecated function use error in the parser.go
udan-jayanith Aug 16, 2025
ee66653
Add QuerySelectorAll and redocumented the library. QuerySearch, Query…
udan-jayanith Aug 16, 2025
39f9bbc
Updated the readme.md
udan-jayanith Aug 16, 2025
fa601d3
Fixed extra tabs in readme.md example go code section.
udan-jayanith Aug 16, 2025
447b637
Updated benchmark_test.go
udan-jayanith Aug 17, 2025
bdef2bb
Made GetParent fast. GetParent need be tested manually. GetParent and…
udan-jayanith Aug 17, 2025
423ceea
Bug fixed
udan-jayanith Aug 17, 2025
caaab5f
Removed QueryAll and Query methods
udan-jayanith Aug 18, 2025
1a79a08
Half finished selectors
udan-jayanith Aug 22, 2025
985b84d
Changed future changelog
udan-jayanith Aug 17, 2025
c16f5fc
Add new selectors. Need manual testing. Closest is temparorly comment…
udan-jayanith Aug 23, 2025
5b58d2e
Fixed selectos does not ignore html tag case
udan-jayanith Aug 23, 2025
3286bce
Fixed a bug in selectors and combinators tokenizer
udan-jayanith Aug 25, 2025
56bb8fb
Bug fixes
udan-jayanith Aug 28, 2025
48ebd58
Added Closest again.
udan-jayanith Aug 28, 2025
5b55acd
Refactored the Encode function
udan-jayanith Aug 28, 2025
f764a27
Bug fixses in test file
udan-jayanith Aug 29, 2025
599645e
Updated the documentation and add example codes
udan-jayanith Aug 29, 2025
62755a7
Documented the Tokenizer
udan-jayanith Aug 29, 2025
bc870b7
Updated the README.md
udan-jayanith Aug 29, 2025
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
{
"cSpell.words": [
"arraystack",
"autoplay",
"Combinators",
"DOCTYPEDTD",
"emirpasic",
"gohtml",
"Kottue",
"linkedliststack",
"println",
"yosssi"
]
}
3 changes: 1 addition & 2 deletions FUTURE-CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
## v0.0.3
- Closest

18 changes: 5 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,35 +26,27 @@ import (
- Querying

## Example

Heres an example of fetching a website and parsing and then using querying methods.

```go
res, err := http.Get("https://www.metalsucks.net/")
if err != nil {
t.Fatal(err)
}
defer res.Body.Close()

//Parses the given html reader and then returns the root node and an error.
node, err := GoHtml.Decode(res.Body)
if err != nil {
t.Fatal(err)
}

nodeList := node.GetElementsByClassName("post-title")
iter := nodeList.IterNodeList()
for node := range iter{
print(node.GetInnerText())
nodeList := node.QuerySelectorAll(".left-content article .post-title")
for node := range nodeList.IterNodeList(){
println(node.GetInnerText())
}
```

## Changelog

Changes, bug fixes and new features in this version.
- add: Tokenizer
- add: NodeTreeBuilder
- renamed: QuerySelector to Query
- renamed: QuerySelectorAll to QueryAll

## Documentation

Fully fledged [documentation](https://pkg.go.dev/github.com/udan-jayanith/GoHTML) is available at [go.pkg](https://pkg.go.dev/)
Expand Down
6 changes: 4 additions & 2 deletions benchmarks/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import(
"net/http"
"time"
)

/*
Adapted from [GoQuery example](https://github.com/PuerkitoBio/goquery?tab=readme-ov-file#examples)
*/
func TestFetchPostCovers(t *testing.T){
res, err := http.Get("https://www.metalsucks.net/")
if err != nil {
Expand All @@ -20,7 +22,7 @@ func TestFetchPostCovers(t *testing.T){
t.Fatal(err)
}

nodeList := node.QueryAll(".sm-feat .clearfix article")
nodeList := node.QuerySelectorAll(".left-content article .post-title")
t.Log("Got ", nodeList.Len(), " post titles.")
iter := nodeList.IterNodeList()
for node := range iter{
Expand Down
2 changes: 1 addition & 1 deletion classList.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (classList ClassList) Encode() string {
return classes
}

// EncodeTo encode className for the node.
// EncodeTo encodes classNames for the node.
// If node is nil EncodeTo does nothing.
func (classList ClassList) EncodeTo(node *Node){
if node == nil {
Expand Down
44 changes: 38 additions & 6 deletions classList_test.go
Original file line number Diff line number Diff line change
@@ -1,31 +1,63 @@
package GoHtml_test

import(
import (
"fmt"
"testing"
"github.com/udan-jayanith/GoHTML"

GoHtml "github.com/udan-jayanith/GoHTML"
)

func TestClasses(t *testing.T){
func TestClasses(t *testing.T) {
node := GoHtml.CreateNode("div")
node.SetAttribute("class", "div-container main")

classList := GoHtml.NewClassList()
classList.DecodeFrom(node)
if !classList.Contains("main"){
if !classList.Contains("main") {
t.Fatal("")
return
}
classList.DeleteClass("main")
if classList.Contains("main"){
if classList.Contains("main") {
t.Fatal("")
return
}

classList.AppendClass("main-div")
if !classList.Contains("main-div"){
if !classList.Contains("main-div") {
t.Fatal("")
return
}

classList.EncodeTo(node)
}

func ExampleClassList_Contains() {
//Creates a div that has classes video-container and main-contents
div := GoHtml.CreateNode("div")
div.SetAttribute("class", "video-container main-contents")

classList := GoHtml.NewClassList()
//Add the classes in the div to the class list
classList.DecodeFrom(div)

//Checks wether the following classes exists in the classList
fmt.Println(classList.Contains("container"))
fmt.Println(classList.Contains("video-container"))

//Output:
//false
//true
}

func ExampleClassList_Encode(){
classList := GoHtml.NewClassList()

//Add classes to the class list
classList.AppendClass("container")
classList.AppendClass("warper")
classList.AppendClass("main-content")

//This would output something like this "warper container main-content". Order of the output is not guaranteed.
fmt.Println(classList.Encode())
}
2 changes: 1 addition & 1 deletion node-list.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"iter"
)

//NodeList can store nodes by appended order.
//NodeList can store nodes by appended order and can iterate over the node list by invoking IterNodeList method.
type NodeList struct {
list *list.List
currentEl *list.Element
Expand Down
18 changes: 18 additions & 0 deletions node-list_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package GoHtml_test

import (
"fmt"
"os"
"testing"

Expand All @@ -13,6 +14,7 @@ func TestIterNodeList1(t *testing.T) {
t.Fatal(err)
return
}
defer file.Close()

node, err := GoHtml.Decode(file)
if err != nil {
Expand All @@ -39,4 +41,20 @@ func TestIterNodeList2(t *testing.T){
for node := range iter{
t.Log(node)
}
}

func ExampleNodeList(){
nodeList := GoHtml.NewNodeList()
nodeList.Append(GoHtml.CreateNode("br"))
nodeList.Append(GoHtml.CreateNode("hr"))
nodeList.Append(GoHtml.CreateNode("div"))

iter := nodeList.IterNodeList()
for node := range iter{
fmt.Println(node.GetTagName())
}
//Output:
//br
//hr
//div
}
27 changes: 24 additions & 3 deletions node-tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package GoHtml

import (
"strings"

"golang.org/x/net/html"
)

Expand Down Expand Up @@ -38,7 +39,7 @@ func (node *Node) SetPreviousNode(previousNode *Node) {
node.previousNode = previousNode
}

// GetChildNode returns the first child elements of this node.
// GetChildNode returns the first child node of this node.
func (node *Node) GetChildNode() *Node {
return node.childNode
}
Expand Down Expand Up @@ -75,7 +76,7 @@ func (node *Node) GetAttribute(attributeName string) (string, bool) {
// RemoveAttribute remove or delete the specified attribute.
func (node *Node) RemoveAttribute(attributeName string) {
delete(node.attributes, strings.TrimSpace(strings.ToLower(attributeName)))

}

// IterateAttributes calls callback at every attribute in the node by passing attribute and value of the node.
Expand Down Expand Up @@ -114,19 +115,21 @@ func (node *Node) AppendChild(childNode *Node) {

lastNode := node.GetChildNode().GetLastNode()
childNode.SetPreviousNode(lastNode)
childNode.setParentNode(lastNode.GetParent())
lastNode.SetNextNode(childNode)
}

// Append inserts the newNode to end of the node chain.
func (node *Node) Append(newNode *Node) {
lastNode := node.GetLastNode()
newNode.SetPreviousNode(lastNode)
newNode.setParentNode(lastNode.GetParent())
lastNode.SetNextNode(newNode)
}

// GetParent returns a pointer to the parent node.
func (node *Node) GetParent() *Node {
return node.GetFirstNode().getParentNode()
return node.parentNode
}

// GetLastNode returns the last node in the node branch.
Expand Down Expand Up @@ -203,3 +206,21 @@ func (node *Node) RemoveNode() {
func (node *Node) IsTextNode() bool {
return node.GetTagName() == ""
}

// Closest traverses the node tree and its parents (heading toward the root node) until it finds a node that matches the selector and returns that node.
// Adapted from [https://developer.mozilla.org/en-US/docs/Web/API/Element/closest](MDN Element: closest() method)
func (node *Node) Closest(selector string) *Node {
traverser := NewTraverser(node)
selectors := TokenizeSelectorsAndCombinators(selector)

for traverser.GetCurrentNode() != nil {
if matchFromRightMostSelectors(traverser.GetCurrentNode(), selectors) {
break
} else if traverser.GetCurrentNode().GetPreviousNode() == nil {
traverser.SetCurrentNodeTo(traverser.GetCurrentNode().GetParent())
} else {
traverser.Previous()
}
}
return traverser.GetCurrentNode()
}
20 changes: 20 additions & 0 deletions node-tree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,24 @@ func TestRemoveNode(t *testing.T){

//p.RemoveNode()
//t.Log(GoHtml.NodeTreeToHTML(article))
}

func TestClosest(t *testing.T){
node, err := testFile4NodeTree()
if err != nil{
t.Fatal(err)
}
node = node.GetElementByClassName("ordered-item")
if node == nil {
t.Fatal("Node is nil.")
}

node = node.Closest("img+.ordered-list")
if node == nil {
t.Fatal("Node is nil")
}else if node.GetTagName() != "ol"{
t.Fatal("Unexpected element.")
}


}
8 changes: 3 additions & 5 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,23 @@ package GoHtml
import (
"io"
"strings"

"golang.org/x/net/html"
)


// Decode reads from rd and create a node-tree. Then returns the root node and nil.
func Decode(r io.Reader) (*Node, error) {
t := NewTokenizer(r)
nodeTreeBuilder := NewNodeTreeBuilder()
for {
tt := t.Advanced()
if tt == html.ErrorToken{
if tt == html.ErrorToken {
break
}

nodeTreeBuilder.WriteNodeTree(t.CurrentNode(), tt)
nodeTreeBuilder.WriteNodeTree(t.GetCurrentNode(), tt)
}
return nodeTreeBuilder.GetRootNode(), nil

}

// HTMLToNodeTree return html code as a node-tree. If error were to occur it would be SyntaxError.
Expand All @@ -30,4 +29,3 @@ func HTMLToNodeTree(html string) (*Node, error) {
return node, err
}


30 changes: 30 additions & 0 deletions parser_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package GoHtml_test

import (
"fmt"
"os"
"strings"
"testing"
Expand All @@ -14,6 +15,7 @@ func TestDecode(t *testing.T) {
t.Fatal(err)
return
}
defer file.Close()

node, err := GoHtml.Decode(file)
if err != nil {
Expand All @@ -24,3 +26,31 @@ func TestDecode(t *testing.T) {
var builder strings.Builder
GoHtml.Encode(&builder, node)
}

func ExampleDecode() {
r := strings.NewReader(`
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>User Profile</title>
</head>
<body>
<h1 class="username">Udan</h1>
<p class="email">udanjayanith@gmail.com</p>
<p>Joined: 01/08/2024</p>
</body>
</html>
`)

rootNode, _ := GoHtml.Decode(r)

titleNode := rootNode.QuerySelector("title")
title := ""
if titleNode != nil {
title = titleNode.GetInnerText()
}
fmt.Println(title)
//Output: User Profile
}
Loading