Jump to content

User:Dbroadwell/php

From Wikipedia, the free encyclopedia
<?php
/*
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

Written by Tomer Chachamu and David Broadwell

These functions require curl with configured cookieconf.txt
Contents of cookieconf.txt:

-b ../prot/cookie.txt
-c ../prot/cookie.txt

--max-time 50

Where ../prot/cookie.txt is the path to a file which will never be served to the public (e.g. on a webserver)
Check permissions on this file or the directory. curl (running as the script) must be able to read and write to it.
External executable curl is used, not libcurl as documented at http://uk2.php.net/curl
*/

// The supported 'SITECHARSET' encodings are ISO-8859-1, UTF-8 and US-ASCII.
define('SITECHARSET','ISO-8859-1');

// files and servers
define('ALLTITLES','all_titles_in_ns0.txt');
define('SERVER','http://en.wikipedia.org');
define('SITENAME','enwiki'); // Not yet used; please use the internal names for Wikimedia Foundation projects (see [[meta:Stewards]])

// Apply bot data here over _
// USERID from Special:Preferences not required, but suggested.
define('OWNER','_');
define('USERID','_');
define('USERNAME','_');
define('PASSWORD','_');

// Specific bot values
define('COOKIE','cookieconf.txt');
define('BOTNAME','link(b)ot');
define('TEMPLATEPATH','/wiki/');
define('TRIGGER','User:dbroadwell/linkthis');
define('POST','User:dbroadwell/linked');
define('OLDTRIGGER','User:Dbroadwell/linkthis&oldid=11155247');

function gettext($title) {
    // returns article XML and last contributor from SERVER via [[Special:export]]
    // top article layer only
    $serv = SERVER;
    xml_parse_into_struct(xml_parser_create('UTF-8'), file_get_contents($serv . '/wiki/Special:Export/' . $title), $val, $ind);
    $text = $val[$ind['TEXT'][0]]['value'];
    if (SITECHARSET != 'UTF-8') {
        $text = utf8_decode($text);
    $contributor = $val[$ind['USERNAME'][0]]['value'];
    }
    return $text, $contributor;
}

function initeditpage() {
    // run once before calling geteditpage() the first time, saves cookie
    // Untested Function
    $user = USRNAME; // args ??
    $pass = PASSWORD;
    $serv = SERVER;
    $cookie = COOKIE;
    if !file_exists($cookie) {
        exec('curl --silent -d "wpName=$user&wpLoginattempt=Log+in&wpPassword=$pass&wpRemember=1&wpRetype=&wpEmail=" --url "$serv/wiki/index.php?title=Special:Userlogin&action=submit" --config $cookie');
    }
}

function geteditpage($title) {
    // returns text and attr you must pass on to posteditpage()
    $user = USERNAME; // should these be args??
    $pass = PASSWORD;
    $serv = SERVER;
    $cookie = COOKIE;
    $title = rawurlencode(str_replace(' ','_',$title));
    $req = shell_exec('curl --silent --url "$serv/w/index.php?title=$title&action=edit" --config cookieconf.txt');
    xml_parse_into_struct(xml_parser_create(SITECHARSET), $req, $val, $ind);
    if (!is_array($ind['INPUT'])) {
        $fna = 'dumps/temp' . mt_rand() . '.html';
        trigger_error("Dump for following error is at $fna", E_USER_NOTICE);
        file_put_contents($fna, $req);
    }
    $fna = 'dumps/temp' . mt_rand() . '.html';
    file_put_contents($fna, $req);
    $p = strpos($req, '<textarea');
    $k = substr($req, $p, strpos($req, '</textarea>') - $p);
    $k = substr($k, strpos($k, '>') + 1);
    $text = html_entity_decode($k, ENT_QUOTES, SITECHARSET);
    foreach ($ind['INPUT'] as $_=>$num) {
        $attr[$val[$num]['attributes']['NAME']] = $val[$num]['attributes']['VALUE'];
    }
    // Unsets remove what a browser would not have submitted.
    unset($attr['search']);
    unset($attr['go']);
    unset($attr['fulltext']);
    return array('text' => $text, 'attr' => $attr);
}

function posteditpage($article, $attr, $newtext, $summary) {
    // Post edit to SERVER, must geteditpage() first
    $title = urlencode(str_replace(' ','_',$article));
    $serv = SERVER; // arg ??
    $cookie = COOKIE;
    $attr['wpSummary'] = $summary;
    $attr['wpTextbox1'] = $newtext;
    // Unsets things that only effect human accounts
    unset($attr['wpPreview']);
    unset($attr['wpWatchthis']);
    foreach ($attr as $name=>$val) {
        $val = urlencode($val);
        if ($name == 'wpTextbox1') {
            var_dump($val);
        }
        $data .= "&{$name}={$val}";
    }
    $data = substr($data, 1);
    $fna = 'temp' . mt_rand() . '.txt';
    file_put_contents($fna,"--data $data");
    $req = shell_exec('curl -sS --config $fna --url "$serv/w/index.php?title=$title&action=submit" --config cookieconf.txt');
    unlink($fna);
    return $req;
}

function filetostring($filename) {
    // Utility: Loads file contens as string and returns
    $handle = fopen($filename,'r');
    $datastring = fread($handle,filesize($filename));
    fclose($handle);
    return $datastring;
}

function filetoarray($filename) {
    // Utility: Loads file into an array and returns
    // strips trailing whitespace and newlines
    $handle = fopen($filename,'r');
    while ($buf = fgets($handle)) {
        $buf = rtrim($buf);
        $dataarray[] = $buf;
    }
    fclose($handle);
    return $dataarray;
}

function newsummary($article,$count) {
    // Takes article name and data creating a summary string
    $owner = OWNER; // should all of these be args?
    $trigger = TRIGGER;
    $botname = BOTNAME;
    $a = "$count suggestions for $article ";
    $b = "posted by $botname. Contact $owner ";
    $c = "if needed or blank $trigger to stop ";
    $d = "article processing.";
    return $a . $b . $c . $d;
}

function gethtml($article) {
    // returns html source of article on $server
    // removes headers and footers
    $serv = SERVER; // arg ??
    $cookie = COOKIE;
    $title = urlencode(str_replace(' ','_',$article));
    $data = shell_exec('curl --silent --url "$serv/w/index.php?title=Special:Whatlinkshere&target=$title" --config cookieconf.txt');
    // magic numbers for getting the article out of html
    $begindata = '<!-- start content -->';
    $enddata = '<!-- end content -->';
    return pullcore($begindata,$enddata,$data);
}

function pullcore($start,$stop,$text) {
    // utility function to keep center of $text
    // returns core string
    $startpos = strpos($text, $start) + strlen($start);
    $stoppos = strpos($text, $stop);
    return substr($text, $startpos, ($stoppos-$startpos));
}

function parselinks($data) {
    // parse html output of Special:Whatlinkshere
    // magic numbers for Special:Whatlinkshere
    $begindata = '<ul>';
    $enddata = '</ul>';
    $text = pullcore($begindata,$enddata,$data);
    // for ns0 matches only use: '/title="([^":]+)"/'
    preg_match_all('/title="([^"]+)"/', $data, $temp);
    return $temp[1]; // the regex return 2 arrays ... 
}

function discoverlinks($articletext,$articlelist) {
    // search article for text matches from articlelist
    // $articletext = string $articlelist = array required
    // returns array of matches between
    $articletodo = ''; // init to avoid error on no matches
    foreach ($articlelist as $articlename) {
        if (strpos($articletext,$articlename)) {
            $articletodo[] = $articlename;
        }
    }
    return $articletodo;
}

function templateswap($article,$trigger,$post) {
    // Utility function swaps TRIGGER and POST in $article
    if !($post) { // No post, remove {{}} too ...
        $trigger = '{{' . $trigger . '}}');
    }
    return str_replace($trigger,$post,$data);
}

function checktemplate($trigger) {
    // Checks $trigger for stop command or stop conditions
    // returns true = continue and false = stop
    // Status: Crazy Psudo Code!
    // Untested Function needs untested function getcontributor($article) 
    /*
    $historictrigger = OLDTRIGGER;
    // Build with strip(gethtml($$historictrigger));
    // might need to replace ' " ' ' and /n to _
    // Will detect greater than a 30% blanking of template
    $lastsize = 517; // size of template at my last save
    $currsize = strlen(gethtml($trigger));
    if $currsize < $lastsize { return FALSE; }

    // Compares levenshtein distance from curr to $historictrigger
    // gettext() may be broken for this usage
    $lastleven = levenshtein($historictrigger);
    $deveation = (levenshtein(gettext($trigger))+$lastlev)/2;
    if $deviation > 10% { Return FALSE; }
    use levenshtein indexes from history entry of linkthis template
    against levenshtein index of current entry
    */
    return TRUE; // just pass untill implmentation
}

function formatlinks($suggestions) {
    // Takes suggestions[] formats them for wikiml
    // returns suggestions[].formatted
    $botname = BOTNAME; // arg ??
    $formatted = "== $botname suggestions ==";
    foreach ($suggestions as $line) {
        $formatted[] = '* [[' . $line .']]';
    }
    return $formatted;
}

// Throw sleep 300 (5 minutes) between calls to not add load to the wiki
function templaterun() {
    // Untested Link(b)ot implementation
    // Load article list from alltitles
    // Parse trigger template for articletodo
    // make sugestions
    // swap template and post updated article
    // post suggestions to article talk/links
    // Untested Function
    $alltitles = ALLTITLES; // args ??
    $botname = BOTNAME;
    $trigger = TRIGGER;
    $post = POST;
    $articlelog = ''; // fill with article + contributor pairs to log
    if !checktemplate($trigger) {
        exit('Terminating on "template modified" stop condition.');
    }
    $articlelist = filetoarray($alltitles);
    $articletodo = parselinks(gethtml('/wiki/' . $trigger));
    if count($articletodo) <1 {
        break;
    } else {
        foreach ($articletodo as $article) {
            // parse and post updated article
            ($attr, $articletext) = geteditpage($article);
            $suggestions = discoverlinks($articletext, $articlelist);
            $newtext = templateswap($article,$trigger,$post);
            $summary = newsummary($article,count($suggestions));
            posteditpage($article, $attr, $newtext, $summary);
            sleep(4); // sandbagged to stay under 6 posts per minute bot limit

            // format and post suggestions to article talk page
            $count = count($suggestions);
            $suggestions = formatlinks($suggestions);
            if $count < 200 { // magic numbers
                $article = 'Talk:' . $article;
            } else {
                $article = 'Talk:' . $article . '/links';
            }
            ($attr, $articletext) = geteditpage($article);
            $summary = newsummary($article,$count);
            posteditpage($article, $attr, ($articletext . $suggestions), $summary);
            sleep(4); // sandbagged to stay under 6 posts per minute bot limit
        }
        // post to log Heck WHERE is the log going to go?
        //
        // geteditpage(log)
        // $summary = newsummary(log)
        // posteditpage(log)
    }
}

function future.localrun() {break;}
function future.formrun() {break;}
?>

If a function returns $a, $b but i only need $a, can i ignore $b with no error?

$a = function($c); (when function return $a and $b)

Yes, like so:
/*
** @desc: function that returns two things (as an array)
*/ 
function X() {
   return array ("want this", "but not this");
}

// we only want the first value, so use this instead of "list ($a, $b)" - will not give error.
list ($a, ) = X();

// Should print out "want this".
print $a;
-- All the best, Nickj (t) 22:50, 28 Mar 2005 (UTC)