diff --git a/search/config/artifacts.yaml b/search/config/artifacts.yaml index d8da46187fdd5f9c1c39092b47034f01a9b69722..c46b9143b7cfab09138a888eb6dfc605c2caa5ba 100644 --- a/search/config/artifacts.yaml +++ b/search/config/artifacts.yaml @@ -15,9 +15,9 @@ artifacts: sramirez-spark-infotheoretic-feature-selection-1.5.0.jar: id: org.wikimedia.search:sramirez-spark-infotheoretic-feature-selection:jar:1.5.0 source: wmf_archiva_releases - rdf-spark-tools-0.3.155-jar-with-dependencies.jar: - id: org.wikidata.query.rdf:rdf-spark-tools:jar:jar-with-dependencies:0.3.155 - source: wmf_archiva_releases + rdf-spark-tools-0.3.158-jar-with-dependencies.jar: + id: org.wikidata.query.rdf:rdf-spark-tools:jar:jar-with-dependencies:0.3.158 + source: wmf_gitlab_packages glent-0.3.6-jar-with-dependencies.jar: id: org.wikimedia.search:glent:jar:jar-with-dependencies:0.3.6 source: wmf_gitlab_packages diff --git a/search/config/dag_config.py b/search/config/dag_config.py index 238ba225a5cb3041e354c80e7e5554636aef87ef..c699c2141b5658684cbd215aebc578a9f8a9d3f1 100644 --- a/search/config/dag_config.py +++ b/search/config/dag_config.py @@ -77,7 +77,7 @@ eventgate_datacenters = wmf_props.get("eventgate_datacenters", ["eqiad", "codfw" refinery_local_directory = wmf_props.get('wikimedia_discovery_analytics_path', '/srv/deployment/analytics/refinery') # Path to the jar containing WDQS/WCQS spark jobs -wdqs_spark_tools = artifact('rdf-spark-tools-0.3.155-jar-with-dependencies.jar') +wdqs_spark_tools = artifact('rdf-spark-tools-0.3.158-jar-with-dependencies.jar') # data_interval_start date formatted as hive partition with year=/month=/day=/hour= YMDH_PARTITION = \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-gen_rev_map.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-gen_rev_map.expected index e30a012f1915aab31e60a40f43894c2f655e9f43..cad515608e60ad05ce3d8231f55c4336bb494e3f 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-gen_rev_map.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-gen_rev_map.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table \ discovery.wikibase_rdf/date=20230212/wiki=commons \ --output-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-munge_dumps.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-munge_dumps.expected index d9f88bdb9e3b1cbaf5226402609c10e98d30f415..69179b4ddd3ede3f8da5155293d5c00dbe2b5c03 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-munge_dumps.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-munge_dumps.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-path \ hdfs://analytics-hadoop/wmf/data/raw/commons/dumps/mediainfo-ttl/20230212/commons-20230212-mediainfo.ttl.bz2 \ --output-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_full.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_full.expected index 6b7070a26b04a61fb61601a65bc303b2bb293688..c42d46f5bbfd73fcb941c94f0d9b21487ba759a1 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_full.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_full.expected @@ -71,7 +71,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table-partition-spec \ discovery.wikibase_rdf/date=20230213/wiki=wikidata \ --output-hdfs-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_main.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_main.expected index 1aea6de79a14f94741f936bc9d50a8af387f9954..0576945da5ccef539f39984b340636f2ccda7d4c 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_main.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_main.expected @@ -71,7 +71,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table-partition-spec \ discovery.wikibase_rdf_subgraphs/snapshot=20230213/wiki=wikidata/scope=wikidata_main \ --output-hdfs-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_scholarly.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_scholarly.expected index 50bec1c9d2c76bd5a5397db8e4d120af9a426fa8..21e9530e801a5e8a2145134b4ddf14c29c2b37da 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_scholarly.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_scholarly.expected @@ -71,7 +71,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table-partition-spec \ discovery.wikibase_rdf_subgraphs/snapshot=20230213/wiki=wikidata/scope=scholarly_articles \ --output-hdfs-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-gen_rev_map.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-gen_rev_map.expected index 79d3c38b10d6f2dcf67154a075458fe711864b04..9a87cc55c30f577206e00dd264ddff2c9bc0c611 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-gen_rev_map.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-gen_rev_map.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table \ discovery.wikibase_rdf/date=20230213/wiki=wikidata \ --output-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-munge_dumps.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-munge_dumps.expected index e6e93fb1b6be0abfd489e2879461c0c2068c3722..871d911a8716be10e3f2f020cd5d038f33e9203d 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-munge_dumps.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-munge_dumps.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-path \ hdfs://analytics-hadoop/wmf/data/raw/wikidata/dumps/all_ttl/20230213/wikidata-20230213-all-BETA.ttl.bz2,hdfs://analytics-hadoop/wmf/data/raw/wikidata/dumps/lexemes_ttl/20230210/wikidata-20230210-lexemes-BETA.ttl.bz2 \ --output-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-split_subgraphs.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-split_subgraphs.expected index 281ce64980b678e69214c1470bb2ca321634a4a8..1c2564703d8592889b922e2e7205bffdb6e327fd 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-split_subgraphs.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-split_subgraphs.expected @@ -73,7 +73,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table-partition-spec \ discovery.wikibase_rdf/date=20230213/wiki=wikidata \ --output-table-partition-spec \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_process_sparql_query.py-process_sparql_query_hourly-extract_queries.expected b/tests/search/fixtures/spark_skein_specs/search_dags_process_sparql_query.py-process_sparql_query_hourly-extract_queries.expected index a59173a86816124abe7d4dc2dd8874a24bb990d5..8f54466e0e340d34779664152500d590f8c95b4f 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_process_sparql_query.py-process_sparql_query_hourly-extract_queries.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_process_sparql_query.py-process_sparql_query_hourly-extract_queries.expected @@ -71,7 +71,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table \ event.wdqs_external_sparql_query/year=2021/month=6/day=1/hour=1 \ --output-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wcqs_streaming_updater_reconcile_hourly-reconcile.expected b/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wcqs_streaming_updater_reconcile_hourly-reconcile.expected index ac54dda4f97e5703df3571173c71062bb03fc9fa..926f62c507ff102ffb6e373dddcffdeb0d170f53 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wcqs_streaming_updater_reconcile_hourly-reconcile.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wcqs_streaming_updater_reconcile_hourly-reconcile.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --domain \ commons.wikimedia.org \ --reconciliation-source \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wdqs_streaming_updater_reconcile_hourly-reconcile.expected b/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wdqs_streaming_updater_reconcile_hourly-reconcile.expected index 0b312db247e8d76ad6b6df09891f7df706e987aa..2284d8bd6f412e62998bdf3a2a2d5b8d611b4ec9 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wdqs_streaming_updater_reconcile_hourly-reconcile.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wdqs_streaming_updater_reconcile_hourly-reconcile.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --domain \ www.wikidata.org \ --reconciliation-source \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_mapping_weekly-map_subgraphs.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_mapping_weekly-map_subgraphs.expected index a7f7f7da2c537372df45959bb650841184916e89..430582ee94da40924eb8d180ef6e0419358b7101 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_mapping_weekly-map_subgraphs.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_mapping_weekly-map_subgraphs.expected @@ -69,7 +69,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --wikidata-table \ discovery.wikibase_rdf/date=20220711/wiki=wikidata \ --all-subgraphs-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_query_mapping_daily-map_subgraphs_queries.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_query_mapping_daily-map_subgraphs_queries.expected index 124850b0d5e5365c74c88d3e72d7d5424203facb..d52791d93db272f1d1af0bf9690aeac9994bcd9e 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_query_mapping_daily-map_subgraphs_queries.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_query_mapping_daily-map_subgraphs_queries.expected @@ -73,7 +73,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --wikidata-table \ discovery.wikibase_rdf/date=20220620/wiki=wikidata \ --top-subgraph-items-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_detailed_subgraph_metrics.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_detailed_subgraph_metrics.expected index d64a8c3822b4ec20d25ee0f6d425e10a0623e2a8..637f16d5c79274263a0fc877ce74862e27eb6bb1 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_detailed_subgraph_metrics.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_detailed_subgraph_metrics.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ detailed-subgraph-metrics \ --all-subgraphs-table \ discovery.all_subgraphs/snapshot=20220704/wiki=wikidata \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_general_subgraph_metrics.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_general_subgraph_metrics.expected index aacae12e378942636c85c7c5249d3b17a5c662e9..02870383f51b2cbc5c6b61eb5f3ded5a37dc95dd 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_general_subgraph_metrics.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_general_subgraph_metrics.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ general-subgraph-metrics \ --wikidata-triples-table \ discovery.wikibase_rdf/date=20220704/wiki=wikidata \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_pair_query_metrics.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_pair_query_metrics.expected index 5cb084f597522f9ab377821c234b2d3853df6ee3..d9d8736316322a1dc3cbfcd74a5ec649cc359766 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_pair_query_metrics.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_pair_query_metrics.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ subgraph-pair-query-metrics \ --subgraph-query-table \ discovery.subgraph_queries/year=2022/month=7/day=2/wiki=wikidata \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_query_metrics.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_query_metrics.expected index 925a87c35d0687a9e6555a464d7be93a6a2f7d0f..d2e812a0baf390929bbe80736614cfe21379faec 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_query_metrics.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_query_metrics.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ query-metrics \ --event-query-table \ event.wdqs_external_sparql_query/year=2022/month=7/day=2/graph_name=wikidata_full \