Skip to content

Commit 821466d

Browse files
committed
Command line options, exit on error and validate config on start
1 parent 5337ea8 commit 821466d

File tree

2 files changed

+126
-31
lines changed

2 files changed

+126
-31
lines changed

DCAT.php

Lines changed: 114 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,30 +7,95 @@
77
*
88
*/
99

10+
/**
11+
* Validate that config is json and contains all necessary keys
12+
* @param array: config
13+
*/
14+
function validateConfig( $config ) {
15+
if ( !isset( $config ) ) {
16+
exit( "Could not read the config file. Are you sure it is valid json?" );
17+
}
18+
// Later tests depend on these existing and being defined
19+
$topBool = array( "api-enabled", "dumps-enabled" );
20+
foreach ( $topBool as $val ) {
21+
if ( !array_key_exists( $val, $config ) ) {
22+
exit( "$val is missing from the config file" );
23+
}
24+
elseif ( !is_bool( $config[$val] ) ) {
25+
exit( "$val in the config file must be a boolean" );
26+
}
27+
}
28+
29+
// Always required
30+
$top = array(
31+
"directory", "uri", "themes", "keywords", "publisher",
32+
"contactPoint", "ld-info", "catalog-license", "catalog-homepage",
33+
"catalog-i18n", "catalog-issued"
34+
);
35+
$sub = array(
36+
"publisher" => array( "publisherType", "homepage", "name", "email" ),
37+
"contactPoint" => array( "vcardType", "name", "email" ),
38+
"ld-info" => array( "accessURL", "mediatype", "license" )
39+
);
40+
41+
// Dependent on topBool
42+
if ( $config['api-enabled'] ) {
43+
array_push( $top, "api-info" );
44+
$sub["api-info"] = array( "accessURL", "mediatype", "license" );
45+
}
46+
if ( $config['dumps-enabled'] ) {
47+
array_push( $top, "dump-info" );
48+
$sub["dump-info"] = array( "accessURL", "mediatype", "license" );
49+
}
50+
51+
// Test
52+
foreach ( $top as $val ) {
53+
if ( !array_key_exists( $val, $config ) ) {
54+
exit( "$val is missing from the config file" );
55+
}
56+
}
57+
foreach ( $sub as $key => $subArray ) {
58+
foreach ( $subArray as $val ) {
59+
if ( !array_key_exists( $val, $config[$key] ) ) {
60+
exit( $key . "[" . $val . "] is missing from the config file" );
61+
}
62+
}
63+
}
64+
}
65+
1066
/**
1167
* Construct a data blob as an easy way of passing data around.
68+
* @param string: path to config file
1269
* @return array: A data blob
1370
*/
14-
function makeDataBlob() {
71+
function makeDataBlob( $config ) {
1572
// Open config file and languages
16-
$config = json_decode( file_get_contents( 'config.json' ), true );
73+
$config = json_decode( file_get_contents( $config ), true );
74+
validateConfig( $config );
1775

18-
// identify existant i18n files
19-
$langs = array ();
76+
// identify existing i18n files
77+
$langs = array();
2078
foreach ( scandir( 'i18n' ) as $key => $filename ) {
2179
if ( substr( $filename, -strlen( '.json' ) ) === '.json' && $filename !== 'qqq.json' ) {
2280
$langs[substr( $filename, 0, -strlen( '.json' ) )] = "i18n/$filename";
2381
}
2482
}
2583

2684
// load i18n files into i18n object
27-
$i18n = array ();
85+
$i18n = array();
2886
foreach ( $langs as $langCode => $filename ) {
2987
$i18n[$langCode] = json_decode( file_get_contents( $filename ), true );
3088
}
3189

3290
// load catalog i18n info from URL and add to i18n object
3391
$i18nJSON = json_decode( file_get_contents( $config['catalog-i18n'] ), true );
92+
if ( !isset( $i18nJSON ) ) {
93+
exit(
94+
"Could not read catalog-i18n. Are you sure " .
95+
$config['catalog-i18n'] .
96+
" exists and is valid json?"
97+
);
98+
}
3499
foreach ( array_keys( $i18n ) as $langCode ) {
35100
if ( array_key_exists( "$langCode-title", $i18nJSON ) ) {
36101
$i18n[$langCode]['catalog-title'] = $i18nJSON["$langCode-title"];
@@ -42,7 +107,7 @@ function makeDataBlob() {
42107

43108
// hardcoded ids (for now at least)
44109
// issue #2
45-
$ids = array (
110+
$ids = array(
46111
'publisher' => '_n42',
47112
'contactPoint' => '_n43',
48113
'liveDataset' => 'liveData',
@@ -53,7 +118,7 @@ function makeDataBlob() {
53118
);
54119

55120
// stick loaded data into blob
56-
$data = array (
121+
$data = array(
57122
'config' => $config,
58123
'dumps' => null,
59124
'i18n' => $i18n,
@@ -107,7 +172,7 @@ function dumpDistributionExtras( XMLWriter $xml, $data, $dumpDate, $format ) {
107172
* @param string $dumpDate the date of the dumpfile, null for live data
108173
*/
109174
function writeDistribution( XMLWriter $xml, $data, $distribId, $prefix, $dumpDate ) {
110-
$ids = array ();
175+
$ids = array();
111176

112177
foreach ( $data['config']["$prefix-info"]['mediatype'] as $format => $mediatype ) {
113178
$id = $data['config']['uri'] . '#' . $distribId . $dumpDate . $format;
@@ -340,7 +405,7 @@ function writeCatalog( XMLWriter $xml, $data, $publisher, $dataset ) {
340405
$xml->endElement();
341406

342407
$xml->writeElementNS( 'foaf', 'homepage', null,
343-
'https://www.wikidata.org' );
408+
$data['config']['catalog-homepage'] );
344409
$xml->writeElementNS( 'dcterms', 'modified', null, date( 'Y-m-d' ) );
345410
$xml->writeElementNS( 'dcterms', 'issued', null,
346411
$data['config']['catalog-issued'] );
@@ -416,7 +481,7 @@ function outputXml( $data ) {
416481
writePublisher( $xml, $data, $data['ids']['publisher'] );
417482
writeContactPoint( $xml, $data, $data['ids']['contactPoint'] );
418483

419-
$dataset = array ();
484+
$dataset = array();
420485

421486
// Live dataset and distributions
422487
$liveDistribs = writeDistribution( $xml, $data,
@@ -468,26 +533,30 @@ function scanDump( $dirname, $data ) {
468533
$teststrings[$fileEnding] = 'all.' . $fileEnding . '.gz';
469534
}
470535

471-
$dumps = array ();
536+
$dumps = array();
472537

473538
foreach ( scandir( $dirname ) as $dirKey => $subdir ) {
474539
// get rid of files and non-relevant sub-directories
475540
if ( substr( $subdir, 0, 1 ) != '.' && is_dir( $dirname . '/' . $subdir ) ) {
476541
// each subdir refers to a timestamp
477-
$dumps[$subdir] = array();
542+
$subDump = array();
478543
foreach ( scandir( $dirname . '/' . $subdir ) as $key => $filename ) {
479544
// match each file against an expected teststring
480545
foreach ( $teststrings as $fileEnding => $teststring ) {
481546
if ( substr( $filename, -strlen( $teststring ) ) === $teststring ) {
482547
$info = stat( "$dirname/$subdir/$filename" );
483-
$dumps[$subdir][$fileEnding] = array(
548+
$subDump[$fileEnding] = array(
484549
'timestamp' => gmdate( 'Y-m-d', $info['mtime'] ),
485550
'byteSize' => $info['size'],
486551
'filename' => $filename
487552
);
488553
}
489554
}
490555
}
556+
// if files found then add to dumps
557+
if ( count( $subDump ) > 0 ) {
558+
$dumps[$subdir] = $subDump;
559+
}
491560
}
492561
}
493562

@@ -498,29 +567,48 @@ function scanDump( $dirname, $data ) {
498567
* Scan dump directory for dump files (if any) and
499568
* create dcatap.rdf in the same directory
500569
*
501-
* @param string $directory directory name, overrides config setting if provided
570+
* @param array command line options to override defaults
502571
*/
503-
function run( $directory = null ) {
572+
function run( $options ) {
504573
// Load config variables and i18n a data blob
505-
$data = makeDataBlob();
506-
507-
// Load directory from config, unless overridden
508-
if ( is_null( $directory ) ) {
509-
$directory = $data['config']['directory'];
574+
if ( !isset( $options['config'] ) ) {
575+
$options['config'] = 'config.json';
510576
}
577+
if ( !is_file( $options['config'] ) ) {
578+
exit( $options['config'] . " does not seem to exist" );
579+
}
580+
$data = makeDataBlob( $options['config'] );
511581

512-
// test if dir exists
513-
if ( !is_dir( $directory ) ) {
514-
echo "$directory is not a valid directory";
515-
return;
582+
// Load directories from config/options and test for existence
583+
if ( !isset( $options['dumpDir'] ) ) {
584+
$options['dumpDir'] = $data['config']['directory'];
585+
}
586+
if ( !is_dir( $options['dumpDir'] ) ) {
587+
exit( $options['dumpDir'] . " is not a valid directory" );
588+
}
589+
if ( !isset( $options['outputDir'] ) ) {
590+
$options['outputDir'] = $data['config']['directory'];
591+
}
592+
if ( !is_dir( $options['outputDir'] ) ) {
593+
exit( $options['outputDir'] . " is not a valid directory" );
516594
}
517595

518596
// add dump data to data blob
519-
$data['dumps'] = scanDump( $directory, $data );
597+
$data['dumps'] = scanDump( $options['dumpDir'], $data );
520598

521599
// create xml string from data blob
522600
$xml = outputXml( $data );
523601

524-
file_put_contents( "$directory/dcatap.rdf", $xml );
602+
file_put_contents( $options['outputDir'] . "/dcatap.rdf", $xml );
525603
}
604+
605+
// run from command-line with options
606+
// Load options
607+
$longopts = array(
608+
"config::", // Path to the config.json, default: config.json
609+
"dumpDir::", // Path to the directory containing entity dumps, default: set in config
610+
"outputDir::" // Path where dcat.rdf should be outputted, default: same as dumpDir
611+
);
612+
$options = getopt( '', $longopts );
613+
run( $options );
526614
?>

README.md

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,18 @@ Current result can be found at [lokal-profil / dcat-wikidata.rdf](https://gist.g
2222
2. Copy `catalog.example.json` to a suitable place (e.g. on-wiki) and
2323
update the translations to fit your wikibase installation. Set this
2424
value as `catalog-i18n` in the config file.
25-
3. Create the dcatap.rdf file by running `php -r "require 'DCAT.php'; run('<PATH>');"`
26-
where `<PATH>` is the relative path to the directory containing the
27-
dumps (if any) and where the dcatap.rdf file should be created.
28-
`<PATH>` can be left out if already supplied through the `directory`
29-
parameter in the config file.
25+
3. Create the dcatap.rdf file by running `php DCAT.php` or
26+
`php DCAT.php --config="<path_1>" --dumpDir="<path_2>" --outputDir="<path_3>"`
27+
where each of the options is optional and can be left out.
28+
The options are:
29+
1. `--config` is the relative path to the json file containing the
30+
configurations, defaults to `./config.json`
31+
2. `--dumpDir` is the relative path to the directory containing the
32+
dumps (if any), defaults to the `directory` parameter in the
33+
config file
34+
3. `--outputDir` is the relative path to the directory where the
35+
`dcatap.rdf` file should be created, defaults to the `directory`
36+
parameter in the config file
3037

3138

3239
## Translations

0 commit comments

Comments
 (0)