aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorV.Krishn <vkrishn4@gmail.com>2021-11-30 09:25:35 +0530
committerV.Krishn <vkrishn4@gmail.com>2021-11-30 09:25:35 +0530
commita89239a186e057690a76287e5267809282cc51c2 (patch)
tree1135c648cb41bd57c73e539d9862096422e91248
parente86687d21b64a45354b956ead9b70b905b311ded (diff)
downloadnbreader-a89239a186e057690a76287e5267809282cc51c2.tar.bz2
improve feedicon logic, fix minor bug
-rw-r--r--scripts/feedicon.sh60
-rw-r--r--scripts/url.inc17
2 files changed, 53 insertions, 24 deletions
diff --git a/scripts/feedicon.sh b/scripts/feedicon.sh
index 062166d..6cc3153 100644
--- a/scripts/feedicon.sh
+++ b/scripts/feedicon.sh
@@ -36,6 +36,7 @@ fi
# Temp files
localSHdr="$ICONTXTDIR/.site.hdr.txt"
localIco="$ICONTXTDIR/.site.ico"
+localIcoUrl="$ICONTXTDIR/.site.ico.url"
localHtml="$ICONTXTDIR/.site.html"
size_limit=102400; # 100Kb limit
@@ -80,12 +81,12 @@ parse_feed_icon_url() {
-e 's/\"//g' \
-e "s/'//g" \
-e "s/\=//" \
- -e "s/>.*$//g" \
- -e "s,/*$,,"
+ -e "s/>.*$//g"
)
ICONURL=$( echo "$ICONURL" | awk '{print $1}' )
- local no_proto=$(echo $ICONURL | grep -i '^\/\/')
+ local no_proto=$(echo $ICONURL | grep -i '^\/\/') # eg. //example.com/favicon.ico
if [ "$no_proto" ]; then ICONURL='https:'${ICONURL}; fi # add protocol https
+ echo $ICONURL > $localIcoUrl
# echo -e ${cYELLOW}'msg: base site icon url -> '${cNORMAL}${ICONURL} '...';
}
@@ -138,7 +139,7 @@ check_icon_size() {
if [ $size_limit -ge "$(($len))" -a 0 -lt "$(($len))" ]; then
return 0;
else
- echo -e ${cRED}"msg: icon size too large or zero size";
+ echo -e ${cRED}"msg: icon size too large or zero size"${cNORMAL};
return 1
fi
}
@@ -160,24 +161,41 @@ clean_temp_icon() {
if [ -f "$localIco" ]; then rm -f "$localIco"; fi
if [ -f "$localSHdr" ]; then rm -f "$localSHdr"; fi
if [ -f "$localHtml" ]; then rm -f "$localHtml"; fi
+ if [ -s "$localIcoUrl" ]; then rm -f $localIcoUrl; fi
}
+# get "$url/favicon.ico" # (ico|png|jpeg|...)
get_feedicon() {
- # get "$url/favicon.ico"
- parse_url $1
+ parse_url $1 # get url parts
BURL=${proto}${host}
if [ ! "$BURL" ]; then return; fi
- fetch_feedicon "$BURL/favicon.ico"
- if is_file_ico $localIco; then
- echo -e ${cYELLOW}'msg: favicon.ico is available - '${cGREEN}'download success'${cNORMAL};
- return;
- fi
+ # 1. Use RSS url dirname variants (direct guess and fetch)
+ local u1=$(echo $url | sed -e "s,?.*$,,")
+ local fs=$(echo $u1 | grep -o '/' | wc -l)
+ seq $fs | while read s; do
+ echo $s --- $u1
+ u1=$(dirname $u1)
+ fetch_feedicon "${proto}$u1/favicon.ico"
+ if is_file_ico $localIco; then
+ echo -e ${cYELLOW}'msg: favicon.ico is available - '${cGREEN}'download success'${cNORMAL}
+ return;
+ fi
+ done
echo -e ${cRED}'msg: favicon.ico not available, retrying ...'${cNORMAL};
- get_site_base "$BURL"
- parse_feed_icon_url
+ # 2. Try to extract from RSS url dirname variant pages
+ BURL=$(echo $url | sed -e "s,?.*$,,")
+ seq $fs | while read s; do
+ if [ ! "$ICONURL" ]; then
+ echo $s --- $BURL
+ BURL=${proto}$(dirname $BURL)
+ get_site_base "$BURL"
+ parse_feed_icon_url
+ fi
+ done
+ touch $localIcoUrl; ICONURL=$(cat $localIcoUrl)
if [ ! "$ICONURL" ]; then
echo -e ${cRED}'msg: shortcut icon not available'${cNORMAL};
return;
@@ -185,16 +203,16 @@ get_feedicon() {
is_furl=$(echo $ICONURL | grep -i '^http'); DATAURI='';
for u in $ICONURL; do # handle sites with multiple favicons
+ if is_datauri $u; then #is a datauri
+ DATAURI=$u;
+ echo -e ${cGREEN}'msg: shortcut datauri-icon download success'${cNORMAL};
+ return;
+ fi
if [ "$is_furl" ]; then
fetch_feedicon $u
else
- if is_datauri $u; then #is a datauri
- DATAURI=$u;
- echo -e ${cGREEN}'msg: shortcut datauri-icon download success'${cNORMAL};
- return;
- else
- fetch_feedicon "$BURL/$u"
- fi
+ BURL=${proto}${host}
+ fetch_feedicon "$BURL/$u"
fi
if is_file_ico $localIco; then
echo -e ${cGREEN}'msg: shortcut icon download success'${cNORMAL};
@@ -249,6 +267,7 @@ update_feedicon() {
get_feedicon $rssurl
_make_datauri_file
+ if [ ! -s "$iconfile" ]; then return; fi
update_icon_status "$URLSUM" '1' "$dbname"
echo -e ${cGREEN}'feedicon::update-feedicon -> icon update done'${cNORMAL};
@@ -330,3 +349,4 @@ fi
## examples
# update_feedicon '9a4a872c5eb377df7aa2c5feea4d02c6022264db'
+
diff --git a/scripts/url.inc b/scripts/url.inc
index 7fdb55c..8403b1b 100644
--- a/scripts/url.inc
+++ b/scripts/url.inc
@@ -2,7 +2,7 @@
## scrapped from somewhere on net
## modified by vkrishn@insteps.net
-parse_url () {
+parse_url() {
if [ ! -n "$1" ]; then return; fi
# extract the protocol
@@ -17,13 +17,21 @@ parse_url () {
user="$(echo $url | grep @ | cut -d@ -f1)"
# extract the host
- host=$(echo $url | sed -e s,$user@,,g -e s,?.*$,, | cut -d/ -f1)
+ host=$(echo $url | sed -e s,$user@,,g | cut -d/ -f1)
# extract the path (if any)
path="$(echo $url | grep / | cut -d/ -f2-)"
+
+ if [ "$2" = "test" ]; then
+ echo proto = $proto
+ echo url = $url
+ echo user = $user
+ echo host = $host
+ echo path = $path
+ fi
}
-is_datauri () {
+is_datauri() {
# http://en.wikipedia.org/wiki/Data_URI_scheme
# eg. href=data:image/gif;base64
# src=data:image/png;base64
@@ -32,6 +40,7 @@ is_datauri () {
isHeader=0
# check header
local header=$(echo $1 | grep -o -E '(^data:image/)(x-icon|png|gif|jpeg)(\;base64)')
- if [ $header != "" ]; then isHeader=1; fi
+ if [ -n "$header" ]; then isDataUri=1; return 0; fi
+ return 1;
}