Jump to content

User:Abshirdheere/Code1

Ka Wikipedia

<source lang="python"> import catlib ,pagegenerators import wikipedia,urllib,gzip,codecs,re import MySQLdb as mysqldb import config text,rowfa,rowi,count=' ',' ',' ',0 site = wikipedia.getSite("en") query = ( SELECT /* SLOW_OK */ cl_to FROM categorylinks WHERE

       cl_from IN
               (SELECT DISTINCT ll_from
               FROM langlinks
               WHERE ll_lang = "so" )
       AND
       cl_to NOT IN
               (SELECT DISTINCT page_title 
               FROM langlinks LEFT JOIN page
                       ON page_id = ll_from        
               WHERE ll_lang = "so"
                       AND page_namespace = 14)

GROUP BY cl_to; ) wikipedia.output(u'Executing query:\n%s' % query) conn = mysqldb.connect("enwiki.labsdb", db = site.dbName(),

                      user = config.db_username,
                      passwd = config.db_password)

cursor = conn.cursor() query = query.encode(site.encoding()) cursor.execute(query) results = cursor.fetchall() count=0 for row in results:

       count+=1
       rowi=unicode(str(row[0]),'UTF-8')
       rowfa+=u'* '+rowi+u'\n'
       text=rowfa.strip()

savefile='sowikicats.txt' with codecs.open(savefile ,mode = 'w',encoding = 'utf8' ) as f:

       f.write(text)

f.close()