Open Encyclopedia

Article Search:

User: UninvitedCompany/user script

From open-encyclopedia.com - the free encyclopedia.

< User:UninvitedCompany

Contents

1 pre.sql
2 post.sh
3 fmtlist.c

steps

  1. Download the split files for en.old from http://download.wikimedia.org. As the database grows there occasionally come to be more split files than there are links. Therefore, after downloading e.g. xaa, xab, xac, xad, xae through the links it also may be necessary to download e.g. xaf and xag.
  2. Compile wikitrunc.c with gcc
  3. $ cat xa? | bunzip2 | wikitrunc > trunc_old.sql
  4. $ mysql --database=test --user=administrator --password=
  5. At the sql prompt, @trunc_old.sql
  6. Allow a week or more for a typical pc.
  7. Run the pre.sql script (14 hours)
  8. Cut and paste Special:Listadmins to a local file named adminlist.txt.
  9. Edit the post.sql script to change minimums (if desired) and run it; you get a file called "list"
  10. Edit the fmtlist.c to change the cutoff date, compile with gcc
  11. fmtlist <list >list.wiki
  12. Paste to Wikipedia:Another list of Wikipedians in order of arrival

wikitrunc.c

 #include <stdio.h>
 myerror( char *str )
 {
   fprintf( stderr, "%s\n", str );
   exit( 1 );
 }
 main()
 {
   static char values[] = "VALUES ";
   int state = 0, c, i;
   while( state < 7 && ( c = getchar() ) != EOF ) {
       putchar( c );
       if( values[ state ] == c )
           state++;
       else
           state = 0;
   }
   while ( c != EOF ) {
       do {
           // want (
           if( ( c = getchar() ) != '(' )
               myerror( "format error, expecting (" );
           putchar( c );
           // want ... , ... , ... ,
           for( i=0; i<3; i++ )
               copyfield();
           truncfield();
           while( (c=copyfield()) != ')' && c != EOF )
               ;
           if( c != ')' )
               myerror( "expecting )" );
           c = getchar();      // ,
           putchar( c );
       } while( c == ',' );
       // ; INSERT INTO old VALUES (
       state = i = 0;
       while( state < 7 && ( c = getchar() ) != EOF ) {
           putchar( c );
           if( values[ state ] == c )
               state++;
           else {
               state = 0;
               if( i++ > 100 )
                   error( "garbage looking for INSERT INTO old VALUES" );
           }
       }
   }
   // must be some trailing stuff, what to do?
   //
   //
   fprintf( stderr, "last char was %c (%d)\n", c, c );
   for( i=100; i--; )
       fprintf( stderr, "%c", getchar() );


   return 0;
 }


 copyfield()
 {
   int c;
   if( (c=getchar()) == '\ ) {
       putchar( c );
       do {
           if( (c = getchar()) == '\\' ) {
               putchar( c );
               putchar( getchar() );
           } else {
               putchar( c );
           }
       } while( c != '\ );
   } else
       putchar( c );
   while( c != EOF && c != ',' && c != ')' ) {
       c = getchar();
       putchar( c );
   }
   return c;
 }
 truncfield()
 {
   int c;
   int maxcopy = 40;
   if( (c=getchar()) == '\ ) {
       putchar( c );
       do {
           if( (c = getchar()) == '\\' ) {
               if( maxcopy > 0 ) {
                   putchar( c );
                   putchar( getchar() );
               } else
                   getchar();
           } else {
               if( maxcopy > 0 || c == '\ ) {
                   putchar( c );
                   maxcopy--;
               }
           }
       } while( c != '\ );
   } else
       putchar( c );
   while( c != EOF && c != ',' && c != ')' ) {
       c = getchar();
       putchar( c );
   }
   return c;
 }

pre.sql

 drop table if exists edit_months;
 drop table if exists track_users;
 create table track_users as select count(old_user_text) as total_edits, old_user_text from old
 group by old_user_text having total_edits > 199;
 # takes 32 minutes


 delete from track_users where old_user_text like '%.%.%.%';
 delete from track_users where total_edits < 200;
 # leaves about 2500 users of interest
 create table edit_months
 as select count(floor( old_timestamp/100000000 )) as month_edits,
 floor( old_timestamp/100000000 ) as yyyymm,
 old.old_user_text as user_text, total_edits
 from old, track_users
 where old.old_user_text = track_users.old_user_text
 group by yyyymm, user_text
 having month_edits > 4;
  1. 14 hours 30 minutes

post.sh

 cat > adminadd.sql <<eof
 drop table if exists adminlist;
 create table adminlist ( user_text varchar(255) binary not null default  ) TYPE=InnoDB;
 insert into adminlist values
 eof
 sed -e "s/.*ser:/(\'/" adminlist.txt | sed -e "s/$/'),/" >>adminadd.sql
 cat >> adminadd.sql <<eof
 ('bogus last record')
 ;
 delete from adminlist where user_text = 'bogus last record';
 eof
 mysql --database=test --user=administrator --password= <<eof
 source adminadd.sql
 drop table if exists tenure;
 create table tenure
   as select user_text, min( yyyymm ) as yyyymm_first, max( yyyymm ) as yyyymm_last, count( yyyymm ) as months_active, total_edits
   from edit_months
   group by user_text, total_edits
   order by yyyymm_first, user_text;
 alter table tenure add ( admin varchar (10) default 'N' );
 update tenure, adminlist set tenure.admin = 'Y' where adminlist.user_text = tenure.user_text;
 eof

 mysql --database=test --user=administrator --password= >list <<eof
 select * from tenure where (total_edits > 1000 and months_active > 5) or admin='Y' order by yyyymm_first, user_text;
 eof
 rm adminadd.sql

fmtlist.c

 main()
 {
   static char *months[] = { "", "January", "February", "March", "April", "May",
       "June", "July", "August", "September", "October", "November", "December" };
   char buf[ 500 ];
   gets( buf );    // throw away header line
   char name[ 500 ], last[ 10 ], first[ 10 ], active[ 10 ], edits[ 10 ], *p, *q;
   int admin;
   char savefirst[ 10 ];
   savefirst[0] = '\0';
   printf( "== About this list ==\n"
   "This list was compiled using an automated script.\n"
   "A manually-maintained list that includes "
   "some interesting commentary but that does not include all users is also available.\n"
   "\n"
   "The purpose of this list is to document the contributions of early contributors,"
   "to draw attention to those prolific contributors who have left the project so that we may better "
   "retain contributors in the future, \n"
   "and to serve as a resource for those who wish to better understand social trends at Wikipedia.\n\n"
   "The list includes Wikipedians who made more than 1000 edits and were active "
   "for more than six months."
   "Administrators are included for comparison even if they do not meet the "
   "edit count and acvity criteria.\n"
   "Edit counts are shown in parenthesis.  Administrators are indicated with a *.\n"
   "Wikipedians are listed as inactive when they no longer make at least five edits per month.\n"
   "Anonymous contributors are not shown.\n"
   "\n"
   "The list is based on data as of October 10, 2004.  Very recent edits -- those marked (top) in a user's "
   "contribution list -- are not included because of technical limitations of the script.\n\n" );
   while( gets( buf ) ) {
       for( p=buf, q=name; *p && *p != '\t'; p++ )
           *q++ = *p;
       *q = '\0';
       p++;
       for( q=first; *p && *p != '\t'; p++ )
           *q++ = *p;
       *q = '\0';
       p++;
       for( q=last; *p && *p != '\t'; p++ )
           *q++ = *p;
       *q = '\0';
       p++;
       for( q=active; *p && *p != '\t'; p++ )
           *q++ = *p;
       *q = '\0';
       p++;
       for( q=edits; *p && *p != '\t'; p++ )
           *q++ = *p;
  
      *q = '\0';
       p++;
       admin = *p == 'Y';
       if( strcmp( first, savefirst ) != 0 ) {
           printf( "\n== %s %.4s ==\n",
                   months[ atoi(first+4) ], first );
           strcpy( savefirst, first );
       }
       if( admin )
           printf( "#. %s*", name, name );
       else
           printf( "#. %s", name, name );
       printf( " (%s", edits );
       printf( ")" );
       if( atoi( last ) < 200408 )
           printf( " - inactive since %s %.4s",
                   months[ atoi(last+4) ], last );
       printf( "\n" );
   }
 }

Contribute

Found an omission? You can freely contribute to this Wikipedia article. Edit 'User: UninvitedCompany/user script' article.

Last Contributor: UninvitedCompany - Article Talk Page: Discussion - GNU FDL: Verbatim Source

About Open Encyclopedia

Open Encyclopedia is an free extensive encyclopedia service provided by the New Frontier Information Network, a newly launched private company which offers easy access to thousands of online articles, e-books and documentation covering a wide variety of broad topics.


This is a minimal rendered version of a open-encyclopedia.com Web page. Our Web site is best viewed using an up-to-date Web browser, such as Mozilla Firefox, Opera or Microsoft Internet Explorer.

Copyright © 2003-2004 Zeeshan Muhammad. All rights reserved. Legal notices. Part of the New Frontier Information Network.