77 *
88 */
99
10+ /**
11+ * Validate that config is json and contains all necessary keys
12+ * @param array: config
13+ */
14+ function validateConfig ( $ config ) {
15+ if ( !isset ( $ config ) ) {
16+ exit ( "Could not read the config file. Are you sure it is valid json? " );
17+ }
18+ // Later tests depend on these existing and being defined
19+ $ topBool = array ( "api-enabled " , "dumps-enabled " );
20+ foreach ( $ topBool as $ val ) {
21+ if ( !array_key_exists ( $ val , $ config ) ) {
22+ exit ( "$ val is missing from the config file " );
23+ }
24+ elseif ( !is_bool ( $ config [$ val ] ) ) {
25+ exit ( "$ val in the config file must be a boolean " );
26+ }
27+ }
28+
29+ // Always required
30+ $ top = array (
31+ "directory " , "uri " , "themes " , "keywords " , "publisher " ,
32+ "contactPoint " , "ld-info " , "catalog-license " , "catalog-homepage " ,
33+ "catalog-i18n " , "catalog-issued "
34+ );
35+ $ sub = array (
36+ "publisher " => array ( "publisherType " , "homepage " , "name " , "email " ),
37+ "contactPoint " => array ( "vcardType " , "name " , "email " ),
38+ "ld-info " => array ( "accessURL " , "mediatype " , "license " )
39+ );
40+
41+ // Dependent on topBool
42+ if ( $ config ['api-enabled ' ] ) {
43+ array_push ( $ top , "api-info " );
44+ $ sub ["api-info " ] = array ( "accessURL " , "mediatype " , "license " );
45+ }
46+ if ( $ config ['dumps-enabled ' ] ) {
47+ array_push ( $ top , "dump-info " );
48+ $ sub ["dump-info " ] = array ( "accessURL " , "mediatype " , "license " );
49+ }
50+
51+ // Test
52+ foreach ( $ top as $ val ) {
53+ if ( !array_key_exists ( $ val , $ config ) ) {
54+ exit ( "$ val is missing from the config file " );
55+ }
56+ }
57+ foreach ( $ sub as $ key => $ subArray ) {
58+ foreach ( $ subArray as $ val ) {
59+ if ( !array_key_exists ( $ val , $ config [$ key ] ) ) {
60+ exit ( $ key . "[ " . $ val . "] is missing from the config file " );
61+ }
62+ }
63+ }
64+ }
65+
1066/**
1167 * Construct a data blob as an easy way of passing data around.
68+ * @param string: path to config file
1269 * @return array: A data blob
1370 */
14- function makeDataBlob () {
71+ function makeDataBlob ( $ config ) {
1572 // Open config file and languages
16- $ config = json_decode ( file_get_contents ( 'config.json ' ), true );
73+ $ config = json_decode ( file_get_contents ( $ config ), true );
74+ validateConfig ( $ config );
1775
18- // identify existant i18n files
19- $ langs = array ();
76+ // identify existing i18n files
77+ $ langs = array ();
2078 foreach ( scandir ( 'i18n ' ) as $ key => $ filename ) {
2179 if ( substr ( $ filename , -strlen ( '.json ' ) ) === '.json ' && $ filename !== 'qqq.json ' ) {
2280 $ langs [substr ( $ filename , 0 , -strlen ( '.json ' ) )] = "i18n/ $ filename " ;
2381 }
2482 }
2583
2684 // load i18n files into i18n object
27- $ i18n = array ();
85+ $ i18n = array ();
2886 foreach ( $ langs as $ langCode => $ filename ) {
2987 $ i18n [$ langCode ] = json_decode ( file_get_contents ( $ filename ), true );
3088 }
3189
3290 // load catalog i18n info from URL and add to i18n object
3391 $ i18nJSON = json_decode ( file_get_contents ( $ config ['catalog-i18n ' ] ), true );
92+ if ( !isset ( $ i18nJSON ) ) {
93+ exit (
94+ "Could not read catalog-i18n. Are you sure " .
95+ $ config ['catalog-i18n ' ] .
96+ " exists and is valid json? "
97+ );
98+ }
3499 foreach ( array_keys ( $ i18n ) as $ langCode ) {
35100 if ( array_key_exists ( "$ langCode-title " , $ i18nJSON ) ) {
36101 $ i18n [$ langCode ]['catalog-title ' ] = $ i18nJSON ["$ langCode-title " ];
@@ -42,7 +107,7 @@ function makeDataBlob() {
42107
43108 // hardcoded ids (for now at least)
44109 // issue #2
45- $ ids = array (
110+ $ ids = array (
46111 'publisher ' => '_n42 ' ,
47112 'contactPoint ' => '_n43 ' ,
48113 'liveDataset ' => 'liveData ' ,
@@ -53,7 +118,7 @@ function makeDataBlob() {
53118 );
54119
55120 // stick loaded data into blob
56- $ data = array (
121+ $ data = array (
57122 'config ' => $ config ,
58123 'dumps ' => null ,
59124 'i18n ' => $ i18n ,
@@ -107,7 +172,7 @@ function dumpDistributionExtras( XMLWriter $xml, $data, $dumpDate, $format ) {
107172 * @param string $dumpDate the date of the dumpfile, null for live data
108173 */
109174function writeDistribution ( XMLWriter $ xml , $ data , $ distribId , $ prefix , $ dumpDate ) {
110- $ ids = array ();
175+ $ ids = array ();
111176
112177 foreach ( $ data ['config ' ]["$ prefix-info " ]['mediatype ' ] as $ format => $ mediatype ) {
113178 $ id = $ data ['config ' ]['uri ' ] . '# ' . $ distribId . $ dumpDate . $ format ;
@@ -340,7 +405,7 @@ function writeCatalog( XMLWriter $xml, $data, $publisher, $dataset ) {
340405 $ xml ->endElement ();
341406
342407 $ xml ->writeElementNS ( 'foaf ' , 'homepage ' , null ,
343- ' https://www.wikidata.org ' );
408+ $ data [ ' config ' ][ ' catalog-homepage ' ] );
344409 $ xml ->writeElementNS ( 'dcterms ' , 'modified ' , null , date ( 'Y-m-d ' ) );
345410 $ xml ->writeElementNS ( 'dcterms ' , 'issued ' , null ,
346411 $ data ['config ' ]['catalog-issued ' ] );
@@ -416,7 +481,7 @@ function outputXml( $data ) {
416481 writePublisher ( $ xml , $ data , $ data ['ids ' ]['publisher ' ] );
417482 writeContactPoint ( $ xml , $ data , $ data ['ids ' ]['contactPoint ' ] );
418483
419- $ dataset = array ();
484+ $ dataset = array ();
420485
421486 // Live dataset and distributions
422487 $ liveDistribs = writeDistribution ( $ xml , $ data ,
@@ -468,26 +533,30 @@ function scanDump( $dirname, $data ) {
468533 $ teststrings [$ fileEnding ] = 'all. ' . $ fileEnding . '.gz ' ;
469534 }
470535
471- $ dumps = array ();
536+ $ dumps = array ();
472537
473538 foreach ( scandir ( $ dirname ) as $ dirKey => $ subdir ) {
474539 // get rid of files and non-relevant sub-directories
475540 if ( substr ( $ subdir , 0 , 1 ) != '. ' && is_dir ( $ dirname . '/ ' . $ subdir ) ) {
476541 // each subdir refers to a timestamp
477- $ dumps [ $ subdir ] = array ();
542+ $ subDump = array ();
478543 foreach ( scandir ( $ dirname . '/ ' . $ subdir ) as $ key => $ filename ) {
479544 // match each file against an expected teststring
480545 foreach ( $ teststrings as $ fileEnding => $ teststring ) {
481546 if ( substr ( $ filename , -strlen ( $ teststring ) ) === $ teststring ) {
482547 $ info = stat ( "$ dirname/ $ subdir/ $ filename " );
483- $ dumps [ $ subdir ] [$ fileEnding ] = array (
548+ $ subDump [$ fileEnding ] = array (
484549 'timestamp ' => gmdate ( 'Y-m-d ' , $ info ['mtime ' ] ),
485550 'byteSize ' => $ info ['size ' ],
486551 'filename ' => $ filename
487552 );
488553 }
489554 }
490555 }
556+ // if files found then add to dumps
557+ if ( count ( $ subDump ) > 0 ) {
558+ $ dumps [$ subdir ] = $ subDump ;
559+ }
491560 }
492561 }
493562
@@ -498,29 +567,48 @@ function scanDump( $dirname, $data ) {
498567 * Scan dump directory for dump files (if any) and
499568 * create dcatap.rdf in the same directory
500569 *
501- * @param string $directory directory name, overrides config setting if provided
570+ * @param array command line options to override defaults
502571 */
503- function run ( $ directory = null ) {
572+ function run ( $ options ) {
504573 // Load config variables and i18n a data blob
505- $ data = makeDataBlob ();
506-
507- // Load directory from config, unless overridden
508- if ( is_null ( $ directory ) ) {
509- $ directory = $ data ['config ' ]['directory ' ];
574+ if ( !isset ( $ options ['config ' ] ) ) {
575+ $ options ['config ' ] = 'config.json ' ;
510576 }
577+ if ( !is_file ( $ options ['config ' ] ) ) {
578+ exit ( $ options ['config ' ] . " does not seem to exist " );
579+ }
580+ $ data = makeDataBlob ( $ options ['config ' ] );
511581
512- // test if dir exists
513- if ( !is_dir ( $ directory ) ) {
514- echo "$ directory is not a valid directory " ;
515- return ;
582+ // Load directories from config/options and test for existence
583+ if ( !isset ( $ options ['dumpDir ' ] ) ) {
584+ $ options ['dumpDir ' ] = $ data ['config ' ]['directory ' ];
585+ }
586+ if ( !is_dir ( $ options ['dumpDir ' ] ) ) {
587+ exit ( $ options ['dumpDir ' ] . " is not a valid directory " );
588+ }
589+ if ( !isset ( $ options ['outputDir ' ] ) ) {
590+ $ options ['outputDir ' ] = $ data ['config ' ]['directory ' ];
591+ }
592+ if ( !is_dir ( $ options ['outputDir ' ] ) ) {
593+ exit ( $ options ['outputDir ' ] . " is not a valid directory " );
516594 }
517595
518596 // add dump data to data blob
519- $ data ['dumps ' ] = scanDump ( $ directory , $ data );
597+ $ data ['dumps ' ] = scanDump ( $ options [ ' dumpDir ' ] , $ data );
520598
521599 // create xml string from data blob
522600 $ xml = outputXml ( $ data );
523601
524- file_put_contents ( " $ directory /dcatap.rdf " , $ xml );
602+ file_put_contents ( $ options [ ' outputDir ' ] . " /dcatap.rdf " , $ xml );
525603}
604+
605+ // run from command-line with options
606+ // Load options
607+ $ longopts = array (
608+ "config:: " , // Path to the config.json, default: config.json
609+ "dumpDir:: " , // Path to the directory containing entity dumps, default: set in config
610+ "outputDir:: " // Path where dcat.rdf should be outputted, default: same as dumpDir
611+ );
612+ $ options = getopt ( '' , $ longopts );
613+ run ( $ options );
526614?>
0 commit comments