diff options
author | V.Krishn <vkrishn4@gmail.com> | 2021-11-30 09:25:35 +0530 |
---|---|---|
committer | V.Krishn <vkrishn4@gmail.com> | 2021-11-30 09:25:35 +0530 |
commit | a89239a186e057690a76287e5267809282cc51c2 (patch) | |
tree | 1135c648cb41bd57c73e539d9862096422e91248 | |
parent | e86687d21b64a45354b956ead9b70b905b311ded (diff) | |
download | nbreader-a89239a186e057690a76287e5267809282cc51c2.tar.bz2 |
improve feedicon logic, fix minor bug
-rw-r--r-- | scripts/feedicon.sh | 60 | ||||
-rw-r--r-- | scripts/url.inc | 17 |
2 files changed, 53 insertions, 24 deletions
diff --git a/scripts/feedicon.sh b/scripts/feedicon.sh index 062166d..6cc3153 100644 --- a/scripts/feedicon.sh +++ b/scripts/feedicon.sh @@ -36,6 +36,7 @@ fi # Temp files localSHdr="$ICONTXTDIR/.site.hdr.txt" localIco="$ICONTXTDIR/.site.ico" +localIcoUrl="$ICONTXTDIR/.site.ico.url" localHtml="$ICONTXTDIR/.site.html" size_limit=102400; # 100Kb limit @@ -80,12 +81,12 @@ parse_feed_icon_url() { -e 's/\"//g' \ -e "s/'//g" \ -e "s/\=//" \ - -e "s/>.*$//g" \ - -e "s,/*$,," + -e "s/>.*$//g" ) ICONURL=$( echo "$ICONURL" | awk '{print $1}' ) - local no_proto=$(echo $ICONURL | grep -i '^\/\/') + local no_proto=$(echo $ICONURL | grep -i '^\/\/') # eg. //example.com/favicon.ico if [ "$no_proto" ]; then ICONURL='https:'${ICONURL}; fi # add protocol https + echo $ICONURL > $localIcoUrl # echo -e ${cYELLOW}'msg: base site icon url -> '${cNORMAL}${ICONURL} '...'; } @@ -138,7 +139,7 @@ check_icon_size() { if [ $size_limit -ge "$(($len))" -a 0 -lt "$(($len))" ]; then return 0; else - echo -e ${cRED}"msg: icon size too large or zero size"; + echo -e ${cRED}"msg: icon size too large or zero size"${cNORMAL}; return 1 fi } @@ -160,24 +161,41 @@ clean_temp_icon() { if [ -f "$localIco" ]; then rm -f "$localIco"; fi if [ -f "$localSHdr" ]; then rm -f "$localSHdr"; fi if [ -f "$localHtml" ]; then rm -f "$localHtml"; fi + if [ -s "$localIcoUrl" ]; then rm -f $localIcoUrl; fi } +# get "$url/favicon.ico" # (ico|png|jpeg|...) get_feedicon() { - # get "$url/favicon.ico" - parse_url $1 + parse_url $1 # get url parts BURL=${proto}${host} if [ ! "$BURL" ]; then return; fi - fetch_feedicon "$BURL/favicon.ico" - if is_file_ico $localIco; then - echo -e ${cYELLOW}'msg: favicon.ico is available - '${cGREEN}'download success'${cNORMAL}; - return; - fi + # 1. Use RSS url dirname variants (direct guess and fetch) + local u1=$(echo $url | sed -e "s,?.*$,,") + local fs=$(echo $u1 | grep -o '/' | wc -l) + seq $fs | while read s; do + echo $s --- $u1 + u1=$(dirname $u1) + fetch_feedicon "${proto}$u1/favicon.ico" + if is_file_ico $localIco; then + echo -e ${cYELLOW}'msg: favicon.ico is available - '${cGREEN}'download success'${cNORMAL} + return; + fi + done echo -e ${cRED}'msg: favicon.ico not available, retrying ...'${cNORMAL}; - get_site_base "$BURL" - parse_feed_icon_url + # 2. Try to extract from RSS url dirname variant pages + BURL=$(echo $url | sed -e "s,?.*$,,") + seq $fs | while read s; do + if [ ! "$ICONURL" ]; then + echo $s --- $BURL + BURL=${proto}$(dirname $BURL) + get_site_base "$BURL" + parse_feed_icon_url + fi + done + touch $localIcoUrl; ICONURL=$(cat $localIcoUrl) if [ ! "$ICONURL" ]; then echo -e ${cRED}'msg: shortcut icon not available'${cNORMAL}; return; @@ -185,16 +203,16 @@ get_feedicon() { is_furl=$(echo $ICONURL | grep -i '^http'); DATAURI=''; for u in $ICONURL; do # handle sites with multiple favicons + if is_datauri $u; then #is a datauri + DATAURI=$u; + echo -e ${cGREEN}'msg: shortcut datauri-icon download success'${cNORMAL}; + return; + fi if [ "$is_furl" ]; then fetch_feedicon $u else - if is_datauri $u; then #is a datauri - DATAURI=$u; - echo -e ${cGREEN}'msg: shortcut datauri-icon download success'${cNORMAL}; - return; - else - fetch_feedicon "$BURL/$u" - fi + BURL=${proto}${host} + fetch_feedicon "$BURL/$u" fi if is_file_ico $localIco; then echo -e ${cGREEN}'msg: shortcut icon download success'${cNORMAL}; @@ -249,6 +267,7 @@ update_feedicon() { get_feedicon $rssurl _make_datauri_file + if [ ! -s "$iconfile" ]; then return; fi update_icon_status "$URLSUM" '1' "$dbname" echo -e ${cGREEN}'feedicon::update-feedicon -> icon update done'${cNORMAL}; @@ -330,3 +349,4 @@ fi ## examples # update_feedicon '9a4a872c5eb377df7aa2c5feea4d02c6022264db' + diff --git a/scripts/url.inc b/scripts/url.inc index 7fdb55c..8403b1b 100644 --- a/scripts/url.inc +++ b/scripts/url.inc @@ -2,7 +2,7 @@ ## scrapped from somewhere on net ## modified by vkrishn@insteps.net -parse_url () { +parse_url() { if [ ! -n "$1" ]; then return; fi # extract the protocol @@ -17,13 +17,21 @@ parse_url () { user="$(echo $url | grep @ | cut -d@ -f1)" # extract the host - host=$(echo $url | sed -e s,$user@,,g -e s,?.*$,, | cut -d/ -f1) + host=$(echo $url | sed -e s,$user@,,g | cut -d/ -f1) # extract the path (if any) path="$(echo $url | grep / | cut -d/ -f2-)" + + if [ "$2" = "test" ]; then + echo proto = $proto + echo url = $url + echo user = $user + echo host = $host + echo path = $path + fi } -is_datauri () { +is_datauri() { # http://en.wikipedia.org/wiki/Data_URI_scheme # eg. href=data:image/gif;base64 # src=data:image/png;base64 @@ -32,6 +40,7 @@ is_datauri () { isHeader=0 # check header local header=$(echo $1 | grep -o -E '(^data:image/)(x-icon|png|gif|jpeg)(\;base64)') - if [ $header != "" ]; then isHeader=1; fi + if [ -n "$header" ]; then isDataUri=1; return 0; fi + return 1; } |