@@ -246,6 +246,20 @@ public function __construct( $glue, $table = "?", $id_field = "pageset_id", $nam
246246
247247 $ this ->table_id_obj = new gpMySQLTable ( $ this ->table , $ this ->id_field );
248248 $ this ->table_id_obj ->add_key_definition ( "PRIMARY KEY ( " . $ this ->id_field . ") " );
249+
250+ $ this ->has_titles = true ; #TODO: port to python
251+ $ this ->has_category_names = true ; #TODO: port to python
252+ }
253+
254+ public function set_has_titles ( $ has ) { #TODO: port to python
255+ $ this ->has_titles = $ has ;
256+ $ this ->has_category_names = $ has ;
257+ }
258+
259+ public function set_has_category_names ( $ has ) { #TODO: port to python
260+ $ this ->has_category_names = $ has ;
261+
262+ if (!$ has ) $ this ->has_titles = $ has ;
249263 }
250264
251265 public function set_expect_big ( $ big ) {
@@ -283,6 +297,18 @@ public function add_from_select( $select ) {
283297 return $ this ->query ( $ sql );
284298 }
285299
300+ public function add_ids_from_select ( $ select ) { #TODO: port to python
301+ $ sql = "REPLACE INTO " . $ this ->table ." " ;
302+ $ sql .= "( " ;
303+ $ sql .= $ this ->id_field . " ) " ;
304+ $ sql .= $ select ;
305+
306+ $ ok = $ this ->query ( $ sql );
307+ if ( $ ok ) $ this ->set_has_titles ( false ); #TODO: port to python
308+
309+ return $ ok ;
310+ }
311+
286312 #TODO: kill delete_where() from python, superceded by strip/retain
287313 #TODO: kill delete_using() from python, superceded by strip/retain
288314
@@ -307,6 +333,8 @@ public function resolve_ids( ) {
307333 $ this ->add_from_select ( $ sql );
308334
309335 $ this ->glue ->drop_temp_table ( $ tmp );
336+
337+ $ this ->set_has_titles ( true ); #TODO: port to python
310338 return true ;
311339 }
312340
@@ -317,14 +345,28 @@ public function make_sink() {
317345
318346 public function make_id_sink () {
319347 $ sink = $ this ->glue ->make_sink ( $ this ->table_id_obj , true );
348+
349+ $ this ->set_has_titles ( false ); #TODO: port to python
320350 return $ sink ;
321351 }
322352
323353 public function make_id_source ( $ ns = null ) {
324354 return $ this ->make_source ( $ ns , true );
325355 }
356+
357+ public function assert_has_titles ( $ ns = null ) { #TODO: port this to python
358+ if ( $ ns === NS_CATEGORY ) {
359+ if ( !$ this ->has_category_names ) throw new gpUsageException ("page titles are not known, call resolve_ids() first " );
360+ } else {
361+ if ( !$ this ->has_titles ) throw new gpUsageException ("page titles are not known, call resolve_ids() first " );
362+ }
363+ }
326364
327365 public function make_source ( $ ns = null , $ ids_only = false ) {
366+ if ( !$ ids_only ) { #TODO: port this check to python
367+ $ this ->assert_has_titles ($ ns );
368+ }
369+
328370 $ t = $ ids_only ? $ this ->table_id_obj : $ this ->table_obj ;
329371
330372 if ( $ ns !== null ) {
@@ -362,14 +404,19 @@ public function copy_ids_to_sink( $ns, $sink ) {
362404 return $ this ->glue ->copy ($ src , $ sink , "~ " );
363405 }
364406
365- public function add_source ( $ src ) {
407+ public function add_source ( $ src ) { #requires titles in source
366408 $ sink = $ this ->make_sink ();
367409 return $ this ->glue ->copy ( $ src , $ sink , "+ " );
368410 }
369411
370412 public function add_page_set ( $ set ) {
371- $ select = $ set ->get_table ()->get_select ();
372- return $ this ->add_from_select ( $ select );
413+ if ( $ set ->has_titles ) {
414+ $ select = $ set ->get_table ()->get_select ();
415+ return $ this ->add_from_select ( $ select );
416+ } else {
417+ $ select = "SELECT " . $ set ->id_field . " FROM " . $ set ->get_table ()->get_name ();
418+ return $ this ->add_ids_from_select ( $ select );
419+ }
373420 }
374421
375422 public function subtract_page_set ( $ set ) {
@@ -472,6 +519,8 @@ public function size() { #TODO: port to python
472519 }
473520
474521 public function strip_namespace ( $ ns , $ inverse = false ) {
522+ $ this ->assert_has_titles ($ ns );
523+
475524 $ where = array ( $ this ->namespace_field => $ ns );
476525 return $ this ->strip ( $ where , null , $ inverse );
477526 }
@@ -513,6 +562,10 @@ public function retain_transcluding( $title, $ns = null ) { #TODO: port to pytho
513562 }
514563
515564 public function strip_modified_since ( $ timestamp ) { #TODO: port to python!
565+ #TODO: normalize $timestamp
566+
567+ $ this ->assert_has_titles ();
568+
516569 $ where = 'rc_timestamp >= ' . $ this ->glue ->quote_string ($ timestamp );
517570
518571 $ join = array (
@@ -528,6 +581,8 @@ public function strip_modified_since( $timestamp ) { #TODO: port to python!
528581 public function retain_modified_since ( $ timestamp ) { #TODO: port to python!
529582 #TODO: normalize $timestamp
530583
584+ $ this ->assert_has_titles ();
585+
531586 $ join = 'LEFT JOIN ' . $ this ->glue ->wiki_table ('recentchanges ' );
532587 $ join .= ' ON rc_namespace = ' . $ this ->namespace_field #literals not assumed: used as a field name, not a string value
533588 . ' AND rc_title = ' . $ this ->title_field #literals not assumed: used as a field name, not a string value
@@ -594,7 +649,9 @@ public function retain_newer( $timestamp ) { #TODO: port to python!
594649 return $ this ->strip_by_creation ( $ timestamp , '<= ' );
595650 }
596651
597- public function strip ( $ where , $ join = null , $ inverse = false ) { #TODO: port to python!
652+ public function strip ( $ where , $ join = null , $ inverse = false ) { #TODO: port to python!
653+ #WARNING: called has to check for has_titles if required!
654+
598655 if ( $ where ) $ where = $ this ->glue ->condition_sql ($ where , true , $ inverse );
599656
600657 if ( is_array ($ join ) ) {
@@ -656,10 +713,15 @@ public function add_page_id( $id ) {
656713 $ sql .= $ this ->glue ->as_list ($ values );
657714
658715 $ this ->query ( $ sql );
716+
717+ $ this ->set_has_titles (false ); #TODO: port to python
659718 return true ;
660719 }
661720
662- public function expand_categories ( $ ns = null ) {
721+ public function expand_categories ( ) { #TODO: port to python: no namespace filter here.
722+ //NOTE: we need category titles! we could resolve ids for categories only, maybe.
723+ $ this ->resolve_ids ();
724+
663725 //NOTE: MySQL can't perform self-joins on temp tables. so we need to copy the category names to another temp table first.
664726 $ t = new gpMySQLTable ("? " , "cat_title " );
665727 $ t ->set_field_definition ("cat_title " , "VARCHAR(255) BINARY NOT NULL " );
@@ -676,33 +738,22 @@ public function expand_categories( $ns = null ) {
676738 #$this->glue->dump_query("select * from ".$tmp->get_name());
677739
678740 // ----------------------------------------------------------
679- $ sql = "select P.page_id, P.page_namespace, P.page_title " ;
680- $ sql .= " from " . $ this ->glue ->wiki_table ( "page " ) . " as P " ;
681- $ sql .= " join " . $ this ->glue ->wiki_table ( "categorylinks " ) . " as X " ;
682- $ sql .= " on X.cl_from = P.page_id " ;
741+ $ sql = "select X.cl_from as page_id " ;
742+ $ sql .= " from " . $ this ->glue ->wiki_table ( "categorylinks " ) . " as X " ;
683743 $ sql .= " join " . $ tmp ->get_name () . " as T " ;
684744 $ sql .= " on T.cat_title = X.cl_to " ;
685745
686- if ($ ns !== null ) {
687- if ( is_array ($ ns ) ) $ sql .= " where P.page_namespace in " . $ this ->glue ->as_list ( $ ns );
688- else $ sql .= " where P.page_namespace = " . (int )$ ns ;
689- }
690-
691746 #$this->glue->dump_query($sql);
692- $ this ->add_from_select ( $ sql );
747+ $ this ->add_ids_from_select ( $ sql );
693748
694749 #$this->glue->dump_query("select * from ".$this->table);
695750 $ this ->glue ->drop_temp_table ( $ tmp );
751+
752+ $ this ->set_has_titles (false ); #TODO: port to python
696753 return true ;
697754 }
698755
699- public function add_subcategories ( $ cat , $ depth , $ without = null , $ without_depth = null ) {
700- $ this ->add_subcategory_ids ($ cat , $ depth , $ without , $ without_depth );
701- $ this ->resolve_ids ();
702- return true ;
703- }
704-
705- protected function add_subcategory_ids ( $ cat , $ depth , $ without = null , $ without_depth = null ) {
756+ public function add_subcategories ( $ cat , $ depth , $ without = null , $ without_depth = null ) { #TODO: port to python: ids only, no titles
706757 $ id = $ this ->glue ->get_page_id ( NS_CATEGORY , $ cat );
707758 if ( !$ id ) return false ;
708759
@@ -719,33 +770,38 @@ protected function add_subcategory_ids( $cat, $depth, $without = null, $without_
719770 }
720771
721772 $ sink ->close ();
773+
774+ $ this ->set_has_titles (false ); #TODO: port to python
722775 return true ;
723776 }
724777
725- public function add_pages_in ( $ cat , $ ns , $ depth ) {
778+ public function add_pages_in ( $ cat , $ depth ) { #TODO: port to python: no ns filter here! adds ids only!
726779 if ( !$ this ->add_subcategories ($ cat , $ depth ) ) return false ;
727780
728- $ this ->expand_categories ($ ns );
781+ $ this ->expand_categories ();
729782 return true ;
730783 }
731784
732785 public function add_pages_transclusing ( $ tag , $ ns = null ) {
733786 if ( $ ns === null ) $ ns = NS_TEMPLATE ;
734787 $ tag = $ this ->glue ->get_db_key ( $ tag );
735788
736- $ sql = " SELECT page_id, page_namespace, page_title " ;
737- $ sql .= " FROM " . $ this ->glue ->wiki_table ( "page " );
738- $ sql .= " JOIN " . $ this ->glue ->wiki_table ( "templatelinks " );
739- $ sql .= " ON tl_from = page_id " ;
789+ $ sql = " SELECT tl_from " ;
790+ $ sql .= " FROM " . $ this ->glue ->wiki_table ( "templatelinks " );
740791 $ sql .= " WHERE tl_namespace = " . (int )$ ns ;
741792 $ sql .= " AND tl_title = " . $ this ->glue ->quote_string ($ tag );
742793
743- return $ this ->add_from_select ($ sql );
794+ $ ok = $ this ->add_ids_from_select ($ sql );
795+
796+ $ this ->set_has_titles (false ); #TODO: port to python
797+ return $ ok ;
744798 }
745799
746800 public function clear () {
747801 $ sql = "TRUNCATE " . $ this ->table ;
748802 $ this ->query ($ sql );
803+
804+ $ this ->set_has_titles (true ); #TODO: port to python
749805 return true ;
750806 }
751807
0 commit comments