Skip to content

Commit 54cc0c8

Browse files
committed
More code refactoring. Limit word splitting and disable globbing by default.
- Globally, now do word splitting (IFS) only on newline (which also makes "$*" expand with newline separator instead of space). - Disable globbing (pathmame expansion), to be re-enabled locally using 'set +f' where needed (typically in a subshell). These changes help eliminate unexpected snags and security vulnerabilities in case someone forgets to quote a variable somewhere. They should also make the code "just work" with spaces and other special characters in file names and tags (as long as they're not newline characters, but that can't happen with regular use of the script as the newline is the separator). This means that, as of this change, editing or completely emptying the convert_filename filter should no longer pose any problems as far as bb.sh is concerned. The changes to adapt the code to the above are mainly: - Now that we do word splitting on newline only, we can go back to iterating through files in a "for" loop instead of using "read" with a here-document, which is more readable. However, to enable globbing locally, a technique adaptation is needed, like: for file in $(set +f; printf '%s\n' *.html) or for file in $(set +f; ls -t -- *.html) # sort by date, newest first Given IFS=$'\n' and globbing disabled globally, this technique is robust for all special characters in file names except for newlines. - invoke_editor() function replaces direct $EDITOR calls, because we need to locally word-split $EDITOR on spaces in case it contains arguments. - parse_file(): rewrite tag parsing to handle possible spaces in tags - tags_in_post(): output line-separated instead of space-separated tags; further adjust sed script to handle possible spaces in tags - rebuild_tags(): this function was refactored to use an array internally. Instead of two combined strings, it now takes HTML files and tags as separate arguments, separated by a single "--tag" argument. This allows for spaces and other special characters in both file names and tags. (See also commit a674ec5, which started this but didn't finish it).
1 parent d1a84d6 commit 54cc0c8

File tree

1 file changed

+81
-47
lines changed

1 file changed

+81
-47
lines changed

bb.sh

Lines changed: 81 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,18 @@
55
# https://github.com/carlesfe/bashblog/contributors
66
# Check out README.md for more details
77

8+
# Some shell settings for robustness by default. These help eliminate
9+
# unexpected snags and security vulnerabilities in case someone forgets to
10+
# quote a variable somewhere. They do require a few coding adaptations.
11+
12+
IFS=$'\n' # Globally, we do word splitting only on newline (which also
13+
# makes "$*" expand with newline separator instead of space).
14+
15+
set -f # Disable globbing (pathname expansion). It can be re-enabled
16+
# locally using 'set +f'; it's handy to do this in a subshell,
17+
# for example in $(command substitution), as the globbing will
18+
# be local to the subshell.
19+
820
# Global variables
921
# It is recommended to perform a 'rebuild' after changing any of this in the code
1022

@@ -252,6 +264,14 @@ get_html_file_content() {
252264
}"
253265
}
254266
267+
# Invoke the editor specified by the $EDITOR environment variable. Use a
268+
# function for this as we need to locally word-split $EDITOR on spaces
269+
# (in case it contains arguments, like EDITOR='joe -nobackups).
270+
invoke_editor() {
271+
local IFS=$' \t\n'
272+
$EDITOR "$1"
273+
}
274+
255275
# Edit an existing, published .html file while keeping its original timestamp
256276
# Please note that this function does not automatically republish anything, as
257277
# it is usually called from 'main'.
@@ -270,7 +290,7 @@ edit() {
270290
touch_timestamp=$(LC_ALL=C date -r "${1%%.*}.html" +'%Y%m%d%H%M')
271291
tags_before=$(tags_in_post "${1%%.*}.html")
272292
if [[ $2 == full ]]; then
273-
$EDITOR "$1"
293+
invoke_editor "$1"
274294
filename=$1
275295
else
276296
if [[ ${1##*.} == md ]]; then
@@ -280,7 +300,7 @@ edit() {
280300
exit
281301
fi
282302
# editing markdown file
283-
$EDITOR "$1"
303+
invoke_editor "$1"
284304
TMPFILE=$(markdown "$1")
285305
filename=${1%%.*}.html
286306
else
@@ -290,7 +310,7 @@ edit() {
290310
get_post_title "$1" > "$TMPFILE"
291311
# Post text with plaintext tags
292312
get_html_file_content 'text' 'text' <"$1" | sed "/^<p>$template_tags_line_header/s|<a href='$prefix_tags\([^']*\).html'>\\1</a>|\\1|g" >> "$TMPFILE"
293-
$EDITOR "$TMPFILE"
313+
invoke_editor "$TMPFILE"
294314
filename=$1
295315
fi
296316
rm "$filename"
@@ -306,10 +326,10 @@ edit() {
306326
chmod 644 "$filename"
307327
echo "Posted $filename"
308328
tags_after=$(tags_in_post "$filename")
309-
relevant_tags=$(echo "$tags_before $tags_after" | tr ',' ' ' | tr ' ' '\n' | sort -u | tr '\n' ' ')
310-
if [[ ! -z $relevant_tags ]]; then
311-
relevant_posts="$(posts_with_tags $relevant_tags) $filename"
312-
rebuild_tags "$relevant_posts" "$relevant_tags"
329+
relevant_tags=$(sort -u <<< "$tags_before"$'\n'"$tags_after")
330+
if [[ -n $relevant_tags ]]; then
331+
relevant_posts=$(posts_with_tags $relevant_tags)$'\n'$filename
332+
rebuild_tags $relevant_posts --tags $relevant_tags
313333
fi
314334
}
315335

@@ -475,10 +495,11 @@ create_html_page() {
475495
parse_file() {
476496
# Read for the title and check that the filename is ok
477497
title=""
478-
while IFS='' read -r line; do
498+
while read -r line; do
479499
if [[ -z $title ]]; then
480500
# remove extra <p> and </p> added by markdown
481-
title=$(echo "$line" | sed 's/<\/*p>//g')
501+
title=${line#<p>}
502+
title=${title%</p>}
482503
if [[ -n $3 ]]; then
483504
filename=$3
484505
else
@@ -498,13 +519,14 @@ parse_file() {
498519
content=$filename.tmp
499520
# Parse possible tags
500521
elif [[ $line == "<p>$template_tags_line_header"* ]]; then
501-
tags=$(echo "$line" | cut -d ":" -f 2- | sed -e 's/<\/p>//g' -e 's/^ *//' -e 's/ *$//' -e 's/, /,/g')
502-
IFS=, read -r -a array <<< "$tags"
503-
504522
echo -n "<p>$template_tags_line_header " >> "$content"
505-
for item in "${array[@]}"; do
506-
echo -n "<a href='$prefix_tags$item.html'>$item</a>, "
507-
done | sed 's/, $/<\/p>/g' >> "$content"
523+
sed "s%</p>%%g
524+
s/^.*:[[:blank:]]*//
525+
s/[[:blank:]]\$//
526+
s/[[:blank:]]*,[[:blank:]]*/,/g
527+
s%\([^,]*\),%<a href='$prefix_tags\1.html'>\1</a>, %g
528+
s%, \([^,]*\)\$%, <a href='$prefix_tags\1.html'>\1</a></p>%
529+
" <<< "$line" >> "$content"
508530
else
509531
echo "$line" >> "$content"
510532
fi
@@ -565,7 +587,7 @@ EOF
565587
filename=""
566588
while [[ $post_status != "p" && $post_status != "P" ]]; do
567589
[[ -n $filename ]] && rm "$filename" # Delete the generated html file, if any
568-
$EDITOR "$TMPFILE"
590+
invoke_editor "$TMPFILE"
569591
if [[ $fmt == md ]]; then
570592
html_from_md=$(markdown "$TMPFILE")
571593
parse_file "$html_from_md"
@@ -607,8 +629,8 @@ EOF
607629
echo "Posted $filename"
608630
relevant_tags=$(tags_in_post $filename)
609631
if [[ -n $relevant_tags ]]; then
610-
relevant_posts="$(posts_with_tags $relevant_tags) $filename"
611-
rebuild_tags "$relevant_posts" "$relevant_tags"
632+
relevant_posts=$(posts_with_tags $relevant_tags)$'\n'$filename
633+
rebuild_tags $relevant_posts --tags $relevant_tags
612634
fi
613635
}
614636

@@ -623,7 +645,7 @@ all_posts() {
623645
{
624646
echo "<h3>$template_archive_title</h3>"
625647
prev_month=""
626-
while IFS='' read -r i; do
648+
for i in $(set +f; ls -t ./*.html); do
627649
is_boilerplate_file "$i" && continue
628650
echo -n "." 1>&3
629651
# Month headers
@@ -640,7 +662,7 @@ all_posts() {
640662
# Date
641663
date=$(LC_ALL=$date_locale date -r "$i" +"$date_format")
642664
echo " $date</li>"
643-
done < <(ls -t ./*.html)
665+
done
644666
echo "" 1>&3
645667
echo "</ul>"
646668
echo "<div id=\"all_posts\"><a href=\"./$index_file\">$template_archive_index_page</a></div>"
@@ -663,7 +685,7 @@ all_tags() {
663685
{
664686
echo "<h3>$template_tags_title</h3>"
665687
echo "<ul>"
666-
for i in $prefix_tags*.html; do
688+
for i in $(set +f; printf '%s\n' $prefix_tags*.html); do
667689
[[ -f "$i" ]] || break
668690
echo -n "." 1>&3
669691
nposts=$(grep -c "<\!-- text begin -->" "$i")
@@ -696,7 +718,8 @@ rebuild_index() {
696718
# Create the content file
697719
{
698720
n=0
699-
while IFS='' read -r i; do
721+
for i in $(set +f; ls -t ./*.html) # sort by date, newest first
722+
do
700723
is_boilerplate_file "$i" && continue;
701724
if ((n >= number_of_index_articles)); then break; fi
702725
if [[ -n $cut_do ]]; then
@@ -706,7 +729,7 @@ rebuild_index() {
706729
fi
707730
echo -n "." 1>&3
708731
n=$(( n + 1 ))
709-
done < <(ls -t ./*.html) # sort by date, newest first
732+
done
710733

711734
feed=$blog_feed
712735
if [[ -n $global_feedburner ]]; then feed=$global_feedburner; fi
@@ -723,9 +746,18 @@ rebuild_index() {
723746

724747
# Finds all tags referenced in one post.
725748
# Accepts either filename as first argument, or post content at stdin
726-
# Prints one line with space-separated tags to stdout
749+
# Prints tags to stdout, one per line.
750+
# (Since we're doing global IFS word splitting on newline only,
751+
# something like 'for tag in $(tags_in_post $i)' will work.)
727752
tags_in_post() {
728-
sed -n "/^<p>$template_tags_line_header/{s/^<p>$template_tags_line_header//;s/<[^>]*>//g;s/[ ,]\+/ /g;p;}" "$1" | tr ', ' ' '
753+
local newline=$'\n'
754+
sed -n "/^<p>$template_tags_line_header/ {
755+
s/^<p>$template_tags_line_header[[:blank:]]*//
756+
s/[[:blank:]]*<[^>]*>[[:blank:]]*//g
757+
s/[[:blank:]]*,[[:blank:]]*/,/g
758+
s/,\+/\\$newline/g
759+
p
760+
}" "$1"
729761
}
730762

731763
# Finds all posts referenced in a number of tags.
@@ -741,17 +773,15 @@ posts_with_tags() {
741773
# Rebuilds tag_*.html files
742774
# if no arguments given, rebuilds all of them
743775
# if arguments given, they should have this format:
744-
# "FILE1 [FILE2 [...]]" "TAG1 [TAG2 [...]]"
776+
# FILE1 [FILE2 [...]] --tags TAG1 [TAG2 [...]]
745777
# where FILEn are files with posts which should be used for rebuilding tags,
746778
# and TAGn are names of tags which should be rebuilt.
747779
# example:
748-
# rebuild_tags "one_post.html another_article.html" "example-tag another-tag"
749-
# mind the quotes!
780+
# rebuild_tags one_post.html another_article.html --tags example-tag another-tag
750781
rebuild_tags() {
751-
local IFS=$'\n' # word splitting only on newline; make $* expand with newline as separator
752782
if (($# < 1)); then
753783
# will process all files and tags
754-
files=( $(ls -t ./*.html) )
784+
files=( $(set +f; ls -t ./*.html) )
755785
all_tags=yes
756786
else
757787
# will process only given files and tags
@@ -765,7 +795,7 @@ rebuild_tags() {
765795
echo -n "Rebuilding tag pages "
766796
n=0
767797
if [[ -n $all_tags ]]; then
768-
rm -f ./"$prefix_tags"*.html
798+
( set +f; rm -f ./"$prefix_tags"*.html )
769799
else
770800
for i in "${tags[@]}"; do
771801
rm -f "./$prefix_tags$i.html"
@@ -792,12 +822,12 @@ rebuild_tags() {
792822
done
793823
rm "$tmpfile"
794824
# Now generate the tag files with headers, footers, etc
795-
while IFS='' read -r i; do
825+
for i in $(set +f; ls -t ./"$prefix_tags"*.tmp.html 2>/dev/null); do
796826
tagname=${i#./"$prefix_tags"}
797827
tagname=${tagname%.tmp.html}
798828
create_html_page "$i" "$prefix_tags$tagname.html" yes "$global_title &mdash; $template_tag_title \"$tagname\"" "$global_author"
799829
rm "$i"
800-
done < <(ls -t ./"$prefix_tags"*.tmp.html 2>/dev/null)
830+
done
801831
echo
802832
}
803833

@@ -821,11 +851,12 @@ get_post_author() {
821851
list_tags() {
822852
if [[ $2 == -n ]]; then do_sort=1; else do_sort=0; fi
823853

824-
ls ./$prefix_tags*.html &> /dev/null
825-
(($? != 0)) && echo "No posts yet. Use 'bb.sh post' to create one" && return
854+
if ! (set +f; set -- $prefix_tags*.html; [[ -e $1 ]]); then
855+
echo "No posts yet. Use 'bb.sh post' to create one"
856+
return
857+
fi
826858

827-
lines=""
828-
for i in $prefix_tags*.html; do
859+
for i in $(set +f; printf '%s\n' $prefix_tags*.html); do
829860
[[ -f "$i" ]] || break
830861
nposts=$(grep -c "<\!-- text begin -->" "$i")
831862
tagname=${i#"$prefix_tags"}
@@ -844,17 +875,19 @@ list_tags() {
844875

845876
# Displays a list of the posts
846877
list_posts() {
847-
ls ./*.html &> /dev/null
848-
(($? != 0)) && echo "No posts yet. Use 'bb.sh post' to create one" && return
878+
if ! (set +f; set -- *.html; [[ -e $1 ]]); then
879+
echo "No posts yet. Use 'bb.sh post' to create one"
880+
return
881+
fi
849882

850883
lines=""
851884
n=1
852-
while IFS='' read -r i; do
885+
for i in $(set +f; ls -t ./*.html); do
853886
is_boilerplate_file "$i" && continue
854887
line="$n # $(get_post_title "$i") # $(LC_ALL=$date_locale date -r "$i" +"$date_format")"
855888
lines+=$line\\n
856889
n=$(( n + 1 ))
857-
done < <(ls -t ./*.html)
890+
done
858891

859892
echo -e "$lines" | column -t -s "#"
860893
}
@@ -877,7 +910,7 @@ make_rss() {
877910
echo "<atom:link href=\"$global_url/$blog_feed\" rel=\"self\" type=\"application/rss+xml\" />"
878911

879912
n=0
880-
while IFS='' read -r i; do
913+
for i in $(set +f; ls -t ./*.html); do
881914
is_boilerplate_file "$i" && continue
882915
((n >= number_of_feed_articles)) && break # max 10 items
883916
echo -n "." 1>&3
@@ -891,7 +924,7 @@ make_rss() {
891924
echo "<pubDate>$(LC_ALL=C date -r "$i" +"%a, %d %b %Y %H:%M:%S %z")</pubDate></item>"
892925

893926
n=$(( n + 1 ))
894-
done < <(ls -t ./*.html)
927+
done
895928

896929
echo '</channel></rss>'
897930
} 3>&1 >"$rssfile"
@@ -989,7 +1022,8 @@ create_css() {
9891022
rebuild_all_entries() {
9901023
echo -n "Rebuilding all entries "
9911024

992-
for i in ./*.html; do # no need to sort
1025+
for i in $(set +f; printf '%s\n' *.html) # no need to sort
1026+
do
9931027
is_boilerplate_file "$i" && continue;
9941028
contentfile=.tmp.$RANDOM
9951029
while [[ -f $contentfile ]]; do contentfile=.tmp.$RANDOM; done
@@ -1042,7 +1076,7 @@ reset() {
10421076
echo "Are you sure you want to delete all blog entries? Please write \"Yes, I am!\" "
10431077
read -r line
10441078
if [[ $line == "Yes, I am!" ]]; then
1045-
rm .*.html ./*.html ./*.css ./*.rss &> /dev/null
1079+
(set +f; rm -f .*.html ./*.html ./*.css ./*.rss)
10461080
echo
10471081
echo "Deleted all posts, stylesheets and feeds."
10481082
echo "Kept your old '.backup.tar.gz' just in case, please delete it manually if needed."
@@ -1114,9 +1148,9 @@ do_main() {
11141148
fi
11151149

11161150
# Test for existing html files
1117-
if ls ./*.html &> /dev/null; then
1151+
if (set +f; set -- *.html; [[ -e $1 ]]); then
11181152
# We're going to back up just in case
1119-
tar -c -z -f ".backup.tar.gz" -- *.html &&
1153+
(set +f; tar -c -z -f ".backup.tar.gz" -- *.html) &&
11201154
chmod 600 ".backup.tar.gz"
11211155
elif [[ $1 == rebuild ]]; then
11221156
echo "Can't find any html files, nothing to rebuild"

0 commit comments

Comments
 (0)