Skip to content

Commit a36b0c3

Browse files
author
Daniel Kinzler
committed
deferred resolution of ids to titles
git-svn-id: https://svn.toolserver.org/svnroot/daniel/duesenstuff/trunk/gpClient@609 9f2c43bc-b3c0-43f4-b155-41619b16f219
1 parent ab37961 commit a36b0c3

File tree

2 files changed

+99
-61
lines changed

2 files changed

+99
-61
lines changed

php/gpMediaWiki.php

Lines changed: 85 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,20 @@ public function __construct( $glue, $table = "?", $id_field = "pageset_id", $nam
246246

247247
$this->table_id_obj = new gpMySQLTable( $this->table, $this->id_field );
248248
$this->table_id_obj->add_key_definition( "PRIMARY KEY (" . $this->id_field . ")" );
249+
250+
$this->has_titles = true; #TODO: port to python
251+
$this->has_category_names = true; #TODO: port to python
252+
}
253+
254+
public function set_has_titles( $has ) { #TODO: port to python
255+
$this->has_titles = $has;
256+
$this->has_category_names = $has;
257+
}
258+
259+
public function set_has_category_names( $has ) { #TODO: port to python
260+
$this->has_category_names = $has;
261+
262+
if (!$has) $this->has_titles = $has;
249263
}
250264

251265
public function set_expect_big( $big ) {
@@ -283,6 +297,18 @@ public function add_from_select( $select ) {
283297
return $this->query( $sql );
284298
}
285299

300+
public function add_ids_from_select( $select ) { #TODO: port to python
301+
$sql= "REPLACE INTO " . $this->table ." ";
302+
$sql .= "( ";
303+
$sql .= $this->id_field . " ) ";
304+
$sql .= $select;
305+
306+
$ok = $this->query( $sql );
307+
if ( $ok ) $this->set_has_titles( false ); #TODO: port to python
308+
309+
return $ok;
310+
}
311+
286312
#TODO: kill delete_where() from python, superceded by strip/retain
287313
#TODO: kill delete_using() from python, superceded by strip/retain
288314

@@ -307,6 +333,8 @@ public function resolve_ids( ) {
307333
$this->add_from_select( $sql );
308334

309335
$this->glue->drop_temp_table( $tmp );
336+
337+
$this->set_has_titles( true ); #TODO: port to python
310338
return true;
311339
}
312340

@@ -317,14 +345,28 @@ public function make_sink() {
317345

318346
public function make_id_sink() {
319347
$sink = $this->glue->make_sink( $this->table_id_obj, true );
348+
349+
$this->set_has_titles( false ); #TODO: port to python
320350
return $sink;
321351
}
322352

323353
public function make_id_source( $ns = null ) {
324354
return $this->make_source( $ns, true );
325355
}
356+
357+
public function assert_has_titles( $ns = null ) { #TODO: port this to python
358+
if ( $ns === NS_CATEGORY ) {
359+
if ( !$this->has_category_names ) throw new gpUsageException("page titles are not known, call resolve_ids() first");
360+
} else {
361+
if ( !$this->has_titles ) throw new gpUsageException("page titles are not known, call resolve_ids() first");
362+
}
363+
}
326364

327365
public function make_source( $ns = null, $ids_only = false ) {
366+
if ( !$ids_only ) { #TODO: port this check to python
367+
$this->assert_has_titles($ns);
368+
}
369+
328370
$t = $ids_only ? $this->table_id_obj : $this->table_obj;
329371

330372
if ( $ns !== null ) {
@@ -362,14 +404,19 @@ public function copy_ids_to_sink( $ns, $sink ) {
362404
return $this->glue->copy($src, $sink, "~");
363405
}
364406

365-
public function add_source( $src ) {
407+
public function add_source( $src ) { #requires titles in source
366408
$sink = $this->make_sink();
367409
return $this->glue->copy( $src, $sink, "+" );
368410
}
369411

370412
public function add_page_set( $set ) {
371-
$select = $set->get_table()->get_select();
372-
return $this->add_from_select( $select );
413+
if ( $set->has_titles ) {
414+
$select = $set->get_table()->get_select();
415+
return $this->add_from_select( $select );
416+
} else {
417+
$select = "SELECT " . $set->id_field . " FROM " . $set->get_table()->get_name();
418+
return $this->add_ids_from_select( $select );
419+
}
373420
}
374421

375422
public function subtract_page_set( $set ) {
@@ -472,6 +519,8 @@ public function size() { #TODO: port to python
472519
}
473520

474521
public function strip_namespace( $ns, $inverse = false ) {
522+
$this->assert_has_titles($ns);
523+
475524
$where = array( $this->namespace_field => $ns );
476525
return $this->strip( $where, null, $inverse );
477526
}
@@ -513,6 +562,10 @@ public function retain_transcluding( $title, $ns = null ) { #TODO: port to pytho
513562
}
514563

515564
public function strip_modified_since( $timestamp ) { #TODO: port to python!
565+
#TODO: normalize $timestamp
566+
567+
$this->assert_has_titles();
568+
516569
$where = 'rc_timestamp >= ' . $this->glue->quote_string($timestamp);
517570

518571
$join = array(
@@ -528,6 +581,8 @@ public function strip_modified_since( $timestamp ) { #TODO: port to python!
528581
public function retain_modified_since( $timestamp ) { #TODO: port to python!
529582
#TODO: normalize $timestamp
530583

584+
$this->assert_has_titles();
585+
531586
$join = 'LEFT JOIN ' . $this->glue->wiki_table('recentchanges');
532587
$join .= ' ON rc_namespace = ' . $this->namespace_field #literals not assumed: used as a field name, not a string value
533588
. ' AND rc_title = ' . $this->title_field #literals not assumed: used as a field name, not a string value
@@ -594,7 +649,9 @@ public function retain_newer( $timestamp ) { #TODO: port to python!
594649
return $this->strip_by_creation( $timestamp, '<=' );
595650
}
596651

597-
public function strip( $where, $join = null, $inverse = false ) { #TODO: port to python!
652+
public function strip( $where, $join = null, $inverse = false ) { #TODO: port to python!
653+
#WARNING: called has to check for has_titles if required!
654+
598655
if ( $where ) $where = $this->glue->condition_sql($where, true, $inverse);
599656

600657
if ( is_array($join) ) {
@@ -656,10 +713,15 @@ public function add_page_id( $id ) {
656713
$sql .= $this->glue->as_list($values);
657714

658715
$this->query( $sql );
716+
717+
$this->set_has_titles(false); #TODO: port to python
659718
return true;
660719
}
661720

662-
public function expand_categories( $ns = null ) {
721+
public function expand_categories( ) { #TODO: port to python: no namespace filter here.
722+
//NOTE: we need category titles! we could resolve ids for categories only, maybe.
723+
$this->resolve_ids();
724+
663725
//NOTE: MySQL can't perform self-joins on temp tables. so we need to copy the category names to another temp table first.
664726
$t = new gpMySQLTable("?", "cat_title");
665727
$t->set_field_definition("cat_title", "VARCHAR(255) BINARY NOT NULL");
@@ -676,33 +738,22 @@ public function expand_categories( $ns = null ) {
676738
#$this->glue->dump_query("select * from ".$tmp->get_name());
677739

678740
// ----------------------------------------------------------
679-
$sql = "select P.page_id, P.page_namespace, P.page_title ";
680-
$sql .= " from " . $this->glue->wiki_table( "page" ) . " as P ";
681-
$sql .= " join " . $this->glue->wiki_table( "categorylinks" ) . " as X ";
682-
$sql .= " on X.cl_from = P.page_id ";
741+
$sql = "select X.cl_from as page_id ";
742+
$sql .= " from " . $this->glue->wiki_table( "categorylinks" ) . " as X ";
683743
$sql .= " join " . $tmp->get_name() . " as T ";
684744
$sql .= " on T.cat_title = X.cl_to ";
685745

686-
if ($ns !== null) {
687-
if ( is_array($ns) ) $sql .= " where P.page_namespace in " . $this->glue->as_list( $ns );
688-
else $sql .= " where P.page_namespace = " . (int)$ns;
689-
}
690-
691746
#$this->glue->dump_query($sql);
692-
$this->add_from_select( $sql );
747+
$this->add_ids_from_select( $sql );
693748

694749
#$this->glue->dump_query("select * from ".$this->table);
695750
$this->glue->drop_temp_table( $tmp );
751+
752+
$this->set_has_titles(false); #TODO: port to python
696753
return true;
697754
}
698755

699-
public function add_subcategories( $cat, $depth, $without = null, $without_depth = null ) {
700-
$this->add_subcategory_ids($cat, $depth, $without, $without_depth);
701-
$this->resolve_ids();
702-
return true;
703-
}
704-
705-
protected function add_subcategory_ids( $cat, $depth, $without = null, $without_depth = null ) {
756+
public function add_subcategories( $cat, $depth, $without = null, $without_depth = null ) { #TODO: port to python: ids only, no titles
706757
$id = $this->glue->get_page_id( NS_CATEGORY, $cat );
707758
if ( !$id ) return false;
708759

@@ -719,33 +770,38 @@ protected function add_subcategory_ids( $cat, $depth, $without = null, $without_
719770
}
720771

721772
$sink->close();
773+
774+
$this->set_has_titles(false); #TODO: port to python
722775
return true;
723776
}
724777

725-
public function add_pages_in( $cat, $ns, $depth ) {
778+
public function add_pages_in( $cat, $depth ) { #TODO: port to python: no ns filter here! adds ids only!
726779
if ( !$this->add_subcategories($cat, $depth) ) return false;
727780

728-
$this->expand_categories($ns);
781+
$this->expand_categories();
729782
return true;
730783
}
731784

732785
public function add_pages_transclusing( $tag, $ns = null ) {
733786
if ( $ns === null ) $ns = NS_TEMPLATE;
734787
$tag = $this->glue->get_db_key( $tag );
735788

736-
$sql = " SELECT page_id, page_namespace, page_title ";
737-
$sql .= " FROM " . $this->glue->wiki_table( "page" );
738-
$sql .= " JOIN " . $this->glue->wiki_table( "templatelinks" );
739-
$sql .= " ON tl_from = page_id ";
789+
$sql = " SELECT tl_from ";
790+
$sql .= " FROM " . $this->glue->wiki_table( "templatelinks" );
740791
$sql .= " WHERE tl_namespace = " . (int)$ns;
741792
$sql .= " AND tl_title = " . $this->glue->quote_string($tag);
742793

743-
return $this->add_from_select($sql);
794+
$ok = $this->add_ids_from_select($sql);
795+
796+
$this->set_has_titles(false); #TODO: port to python
797+
return $ok;
744798
}
745799

746800
public function clear() {
747801
$sql = "TRUNCATE " . $this->table;
748802
$this->query($sql);
803+
804+
$this->set_has_titles(true); #TODO: port to python
749805
return true;
750806
}
751807

php/test/gpMediaWiki.test.php

Lines changed: 14 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ public function testAddPagesIn() {
213213

214214
//-----------------------------------------------------------
215215
$set->clear();
216-
$ok = $set->add_pages_in("topics", null, 5);
216+
$ok = $set->add_pages_in("topics", 5);
217217
$this->assertTrue( $ok );
218218

219219
$a = $set->capture();
@@ -229,32 +229,14 @@ public function testAddPagesIn() {
229229

230230
//-----------------------------------------------------------
231231
$set->clear();
232-
$ok = $set->add_pages_in("topics", null, 5);
232+
$ok = $set->add_pages_in("topics", 5);
233233
$this->assertTrue( $ok );
234234

235235
$a = $set->capture( NS_MAIN );
236236
$this->assertEquals(array(array(1111, NS_MAIN, "Lager"),
237237
array(1112, NS_MAIN, "Pils"),
238238
array(1122, NS_MAIN, "Toe_Cheese")), $a );
239239

240-
//-----------------------------------------------------------
241-
$set->clear();
242-
$ok = $set->add_pages_in("Portals", NS_MAIN, 5);
243-
$this->assertTrue( $ok );
244-
245-
$a = $set->capture();
246-
$this->assertEquals(array(array(1, NS_MAIN, "Main_Page"),
247-
array(20, NS_CATEGORY, "Portals")), $a );
248-
249-
//-----------------------------------------------------------
250-
$set->clear();
251-
$ok = $set->add_pages_in("portals", array(NS_MAIN, NS_PROJECT), 5);
252-
$this->assertTrue( $ok );
253-
254-
$a = $set->capture( array(NS_MAIN, NS_PROJECT) );
255-
$this->assertEquals(array(array(1, NS_MAIN, "Main_Page"),
256-
array(2, NS_PROJECT, "Help_Out")), $a );
257-
258240
//-----------------------------------------------------------
259241
$set->dispose();
260242
}
@@ -269,7 +251,7 @@ public function testBufferedAddPagesIn() {
269251

270252
//-----------------------------------------------------------
271253
$set->clear();
272-
$ok = $set->add_pages_in("topics", null, 5);
254+
$ok = $set->add_pages_in("topics", 5);
273255
$this->assertTrue( $ok );
274256

275257
$a = $set->capture();
@@ -298,8 +280,8 @@ public function testSubtractPageSet() {
298280
$rset->create_table();
299281

300282
//-----------------------------------------------------------
301-
$ok = $set->add_pages_in("topics", null, 5);
302-
$ok = $rset->add_pages_in("Maintenance", null, 5);
283+
$ok = $set->add_pages_in("topics", 5);
284+
$ok = $rset->add_pages_in("Maintenance", 5);
303285

304286
$ok = $set->subtract_page_set( $rset );
305287
$this->assertTrue( $ok );
@@ -329,8 +311,8 @@ public function testRetainPageSet() {
329311
$rset->create_table();
330312

331313
//-----------------------------------------------------------
332-
$ok = $set->add_pages_in("topics", null, 5);
333-
$ok = $rset->add_pages_in("Maintenance", null, 5);
314+
$ok = $set->add_pages_in("topics", 5);
315+
$ok = $rset->add_pages_in("Maintenance", 5);
334316

335317
$ok = $set->retain_page_set( $rset );
336318
$this->assertTrue( $ok );
@@ -357,8 +339,8 @@ public function testAddPageSet() {
357339
$cheese->create_table();
358340

359341
//-----------------------------------------------------------
360-
$ok = $cheese->add_pages_in("Cheese", null, 5);
361-
$ok = $beer->add_pages_in("Beer", null, 5);
342+
$ok = $cheese->add_pages_in("Cheese", 5);
343+
$ok = $beer->add_pages_in("Beer", 5);
362344

363345
$ok = $cheese->add_page_set( $beer );
364346
$this->assertTrue( $ok );
@@ -385,7 +367,7 @@ public function testStrip() {
385367
$set = new gpPageSet($this->gp);
386368
$set->create_table();
387369

388-
$set->add_pages_in("topics", null, 5);
370+
$set->add_pages_in("topics", 5);
389371

390372
//-----------------------------------------------------------
391373
$set->strip( "page_namespace = " . NS_CATEGORY );
@@ -413,7 +395,7 @@ public function testStripNamespace() {
413395

414396
//-----------------------------------------------------------
415397
$set->clear();
416-
$set->add_pages_in("topics", null, 5);
398+
$set->add_pages_in("topics", 5);
417399
$set->strip_namespace( NS_CATEGORY );
418400

419401
$a = $set->capture();
@@ -425,7 +407,7 @@ public function testStripNamespace() {
425407

426408
//-----------------------------------------------------------
427409
$set->clear();
428-
$set->add_pages_in("Portals", null, 5);
410+
$set->add_pages_in("Portals", 5);
429411
$set->strip_namespace( array(NS_CATEGORY, NS_PROJECT) );
430412

431413
$a = $set->capture();
@@ -446,7 +428,7 @@ public function testRetainNamespace() {
446428

447429
//-----------------------------------------------------------
448430
$set->clear();
449-
$set->add_pages_in("topics", null, 5);
431+
$set->add_pages_in("topics", 5);
450432
$set->retain_namespace( array(NS_MAIN) );
451433

452434
$a = $set->capture();
@@ -458,7 +440,7 @@ public function testRetainNamespace() {
458440

459441
//-----------------------------------------------------------
460442
$set->clear();
461-
$set->add_pages_in("Portals", null, 5);
443+
$set->add_pages_in("Portals", 5);
462444
$set->retain_namespace( NS_MAIN );
463445

464446
$a = $set->capture();

0 commit comments

Comments
 (0)