aboutsummaryrefslogtreecommitdiff
path: root/scripts/blocklist.php
blob: c0571b34cad83149c61a941ae880ab451da5fb76 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
<?php if (!defined('PmWiki')) exit();
/*  Copyright 2006-2024 Patrick R. Michaud (pmichaud@pobox.com)
    This file is part of PmWiki; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published
    by the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.  See pmwiki.php for full details.

    This script adds blocklisting capabilities to PmWiki, and can
    be enabled by simply setting the following in local/config.php:

        $EnableBlocklist = 1;

    With $EnableBlocklist set to 1, this module will search through
    the SiteAdmin.Blocklist page, as well as any other pages given by
    the $Blocklist pages variable, looking for lines of the
    form "block:some phrase" or "block:/regex/", with "some phrase" 
    and "/regex/" indicating things to be excluded from any 
    posting to the site.  

    In addition, if a page contains IP addresses of the form
    "a.b.c.d" or "a.b.c.*", then any posts coming from hosts
    matching the address will be blocked.

    There is also an "unblock:..." form, which removes an entry
    from the blocklist.  This is useful for removing specific
    block items in wikifarms and with automatically downloaded
    blocklists (below).

    The script also has the capability of automatically downloading
    blocklists from other sources, such as chongqed.org and
    and the MoinMaster blocklist.  These are configured using
    the $BlocklistDownload array.  An $EnableBlocklist value
    of at least 10 configures PmWiki to automatically download
    these external blocklists and refresh them daily.

    More information about blocklists is available in the
    PmWiki.Blocklist page.
    
    Script maintained by Petko YOTOV www.pmwiki.org/petko
*/


##   Some recipes do page updates outside of the built-in posting
##   cycle, so $EnableBlocklistImmediate is used to determine if
##   we need to catch these.  Currently this defaults to enabled,
##   but at some point we may change the default to disabled.
if (IsEnabled($EnableBlocklistImmediate, 1)) {
  SDVA($BlocklistActions, array('comment' => 1));
  $ptext = PRI(' ', @$_POST);
  if ($ptext && @$BlocklistActions[$action]) {
    Blocklist($pagename, $ptext);
    if (!$EnablePost) {
      unset($_POST['post']);
      unset($_POST['postattr']);
      unset($_POST['postedit']);
    }
  }
}


##   If $EnableBlocklist is set to 10 or higher, then arrange to 
##   periodically download the "moinmaster" blocklists.
if ($EnableBlocklist >= 10) {
  SDVA($BlocklistDownload['SiteAdmin.Blocklist-MoinMaster'], array(
    'url' => 'https://moinmo.in/BadContent?action=raw',
    'format' => 'regex'));
}


##   CheckBlocklist is inserted into $EditFunctions, to automatically
##   check for blocks on anything being posted through the normal
##   "update a page cycle"
array_unshift($EditFunctions, 'CheckBlocklist');
function CheckBlocklist($pagename, &$page, &$new) { 
  StopWatch("CheckBlocklist: begin $pagename");
  $ptext = implode(' ', @$_POST);
  if (@$ptext) Blocklist($pagename, $ptext); 
  StopWatch("CheckBlocklist: end $pagename");
}


##   Blocklist is the function that does all of the work of
##   checking for reasons to block a posting.  It reads
##   the available blocklist pages ($BlocklistPages) and
##   builds an array of strings and regular expressiongs to
##   be checked against the page; if any are found, then
##   posting is blocked (via $EnablePost=0).  The function
##   also checks the REMOTE_ADDR against any blocked IP addresses.
function Blocklist($pagename, $text) {
  global $BlocklistPages, $BlockedMessagesFmt, $BlocklistDownload,
    $BlocklistDownloadRefresh, $Now, $EnablePost, $WhyBlockedFmt,
    $MessagesFmt, $BlocklistMessageFmt, $EnableWhyBlocked, $IsBlocked;

  StopWatch("Blocklist: begin $pagename");

  $BlocklistDownload = (array)@$BlocklistDownload;
  SDV($BlocklistPages, 
    array_merge(array('$SiteAdminGroup.Blocklist', 
                      '$SiteAdminGroup.Blocklist-Farm'),
                array_keys($BlocklistDownload)));
  SDV($BlocklistMessageFmt, "<h3 class='wikimessage'>$[This post has been blocked by the administrator]</h3>");
  SDVA($BlockedMessagesFmt, array(
    'ip' => '$[Address blocked from posting]: ',
    'text' => '$[Text blocked from posting]: '));
  SDV($BlocklistDownloadRefresh, 86400);

  ##  Loop over all blocklist pages
  foreach((array)$BlocklistPages as $b) {

    ##  load the current blocklist page
    $pn = FmtPageName($b, $pagename);
    $page = ReadPage($pn, READPAGE_CURRENT);
    if (!$page) continue;

    ##  if the page being checked is a blocklist page, stop blocking
    if ($pagename == $pn) return;

    ##  If the blocklist page is managed by automatic download,
    ##  schedule any new downloads here
    if (@$BlocklistDownload[$pn]) {
      $bd = &$BlocklistDownload[$pn];
      SDVA($bd, array(
        'refresh' => $BlocklistDownloadRefresh,
        'url' => "http://www.pmwiki.org/blocklists/$pn" ));
      if (!@$page['text'] || $page['time'] < $Now - $bd['refresh'])
        register_shutdown_function('BlocklistDownload', $pn, getcwd());
    }
    
    if (!@$page['text']) continue;
    

    ##  If the blocklist is simply a list of regexes to be matched, load 
    ##  them into $terms['block'] and continue to the next blocklist page.
    ##  Some regexes from remote sites aren't well-formed, so we have
    ##  to escape any slashes that aren't already escaped.
    if (strpos(@$page['text'], 'blocklist-format: regex') !==false) {
      if (preg_match_all('/^([^\\s#].+)/m', $page['text'], $match)) 
        foreach($match[0] as $m) {
          $m = preg_replace('#(?<!\\\\)/#', '\\/', trim($m));
          $terms['block'][] = "/$m/";
        }
      continue;
    }

    ##  Treat the page as a pmwiki-format blocklist page, with
    ##  IP addresses and "block:"-style declarations.  First, see
    ##  if we need to block the author based on a.b.c.d or a.b.c.*
    ##  IP addresses.
    $ip = preg_quote($_SERVER['REMOTE_ADDR']);
    $ip = preg_replace('/\\d+$/', '($0\\b|\\*)', $ip);
    if (preg_match("/\\b$ip/", @$page['text'], $match)) {
      $EnablePost = 0;
      $IsBlocked = 1;
      $WhyBlockedFmt[] = $BlockedMessagesFmt['ip'] . $match[0];
    }

    ##  Now we'll load any "block:" or "unblock:" specifications
    ##  from the page text.
    if (preg_match_all('/(un)?(?:block|regex):(.*)/', @$page['text'], 
                       $match, PREG_SET_ORDER)) 
      foreach($match as $m) $terms[$m[1].'block'][] = trim($m[2]);
  }

  ##  okay, we've loaded all of the terms, now subtract any 'unblock'
  ##  terms from the block set.
  StopWatch("Blocklist: diff unblock");
  $blockterms = array_diff((array)@$terms['block'], (array)@$terms['unblock']);

  ##  go through each of the remaining blockterms and see if it matches the
  ##  text -- if so, disable posting and add a message to $WhyBlockedFmt.
  StopWatch('Blocklist: blockterms (count='.count($blockterms).')');
  $itext = strtolower($text);
  foreach($blockterms as $b) {
    if ($b[0] == '/') {
      if (!preg_match($b, $text)) continue;
    } else if (strpos($itext, strtolower($b)) === false) continue;
    $EnablePost = 0;
    $IsBlocked = 1;
    $WhyBlockedFmt[] = $BlockedMessagesFmt['text'] . $b;
  }
  StopWatch('Blocklist: blockterms done');

  ##  If we came across any reasons to block, let's provide a message
  ##  to the author that it was blocked.  If $EnableWhyBlocked is set,
  ##  we'll even tell the author why.  :-)
  if (@$WhyBlockedFmt) {
    $MessagesFmt[] = $BlocklistMessageFmt;
    if (IsEnabled($EnableWhyBlocked, 0)) 
      foreach((array)$WhyBlockedFmt as $why) 
        $MessagesFmt[] = "<pre class='blocklistmessage'>$why</pre>\n";
  }
  StopWatch("Blocklist: end $pagename");
}


##   BlocklistDownload() handles retrieving blocklists from
##   external sources into PmWiki pages.  If it's able to
##   download an updated list, it uses that; otherwise it leaves
##   any existing list alone.
function BlocklistDownload($pagename, $dir = '') {
  global $BlocklistDownloadFmt, $BlocklistDownload, $FmtV;

  if ($dir) { flush(); chdir($dir); }
  SDV($BlocklistDownloadFmt, "
  [@
## blocklist-note:   NOTE: This page is automatically generated by blocklist.php
## blocklist-note:   NOTE: Any edits to this page may be lost!
## blocklist-url:    \$BlocklistDownloadUrl
## blocklist-when:   \$CurrentTimeISO
#  blocklist-format: \$BlocklistFormat
\$BlocklistData
  @]
");

  ##  get the existing blocklist page
  $bd = &$BlocklistDownload[$pagename];
  $page = ReadPage($pagename, READPAGE_CURRENT);

  ##  try to retrieve the remote data
  $blocklistdata = @file($bd['url']);

  ##  if we didn't get it, and we don't already have text, save a
  ##  note in the page so we know what happened
  if (!$blocklistdata && !@$page['text']) {
    $auf = ini_get('allow_url_fopen');
    $blocklistdata = "#### Unable to download blocklist (allow_url_fopen=$auf)";
  }

  ##  if we have some new text to save, let's format it and save it
  if ($blocklistdata) {
    $blocklistdata = implode('', (array)$blocklistdata);
    $blocklistdata = preg_replace('/^##blocklist.*/m', '', $blocklistdata);
    $FmtV['$BlocklistData'] = $blocklistdata;
    $FmtV['$BlocklistDownloadUrl'] = $bd['url'];
    $FmtV['$BlocklistFormat'] = $bd['format'];
    $page['text'] = FmtPageName($BlocklistDownloadFmt, $pagename);
    SDV($page['passwdread'], '@lock');
  }

  ##  save our updated(?) blocklist page
  WritePage($pagename, $page);
}