From 18d977dc19278407d9912eda4bf1630cb3ccfaac Mon Sep 17 00:00:00 2001 From: Erik Bernhardson Date: Tue, 8 Jul 2025 10:19:06 -0700 Subject: [PATCH] search: Update rdf-spark-tools to 0.3.158 Updated jar propagates the new api_user_agent column during sparql query processing. Bug: T396002 --- search/config/artifacts.yaml | 6 +++--- search/config/dag_config.py | 2 +- ...gs_import_ttl.py-import_commons_ttl-gen_rev_map.expected | 2 +- ...gs_import_ttl.py-import_commons_ttl-munge_dumps.expected | 2 +- ...mport_ttl.py-import_wikidata_ttl-export_n3_full.expected | 2 +- ...mport_ttl.py-import_wikidata_ttl-export_n3_main.expected | 2 +- ..._ttl.py-import_wikidata_ttl-export_n3_scholarly.expected | 2 +- ...s_import_ttl.py-import_wikidata_ttl-gen_rev_map.expected | 2 +- ...s_import_ttl.py-import_wikidata_ttl-munge_dumps.expected | 2 +- ...port_ttl.py-import_wikidata_ttl-split_subgraphs.expected | 2 +- ....py-process_sparql_query_hourly-extract_queries.expected | 2 +- ...qs_streaming_updater_reconcile_hourly-reconcile.expected | 2 +- ...qs_streaming_updater_reconcile_hourly-reconcile.expected | 2 +- ...apping.py-subgraph_mapping_weekly-map_subgraphs.expected | 2 +- ...graph_query_mapping_daily-map_subgraphs_queries.expected | 2 +- ...etrics_weekly-extract_detailed_subgraph_metrics.expected | 2 +- ...metrics_weekly-extract_general_subgraph_metrics.expected | 2 +- ...trics_daily-extract_subgraph_pair_query_metrics.expected | 2 +- ...ry_metrics_daily-extract_subgraph_query_metrics.expected | 2 +- 19 files changed, 21 insertions(+), 21 deletions(-) diff --git a/search/config/artifacts.yaml b/search/config/artifacts.yaml index d8da46187..c46b9143b 100644 --- a/search/config/artifacts.yaml +++ b/search/config/artifacts.yaml @@ -15,9 +15,9 @@ artifacts: sramirez-spark-infotheoretic-feature-selection-1.5.0.jar: id: org.wikimedia.search:sramirez-spark-infotheoretic-feature-selection:jar:1.5.0 source: wmf_archiva_releases - rdf-spark-tools-0.3.155-jar-with-dependencies.jar: - id: org.wikidata.query.rdf:rdf-spark-tools:jar:jar-with-dependencies:0.3.155 - source: wmf_archiva_releases + rdf-spark-tools-0.3.158-jar-with-dependencies.jar: + id: org.wikidata.query.rdf:rdf-spark-tools:jar:jar-with-dependencies:0.3.158 + source: wmf_gitlab_packages glent-0.3.6-jar-with-dependencies.jar: id: org.wikimedia.search:glent:jar:jar-with-dependencies:0.3.6 source: wmf_gitlab_packages diff --git a/search/config/dag_config.py b/search/config/dag_config.py index 238ba225a..c699c2141 100644 --- a/search/config/dag_config.py +++ b/search/config/dag_config.py @@ -77,7 +77,7 @@ eventgate_datacenters = wmf_props.get("eventgate_datacenters", ["eqiad", "codfw" refinery_local_directory = wmf_props.get('wikimedia_discovery_analytics_path', '/srv/deployment/analytics/refinery') # Path to the jar containing WDQS/WCQS spark jobs -wdqs_spark_tools = artifact('rdf-spark-tools-0.3.155-jar-with-dependencies.jar') +wdqs_spark_tools = artifact('rdf-spark-tools-0.3.158-jar-with-dependencies.jar') # data_interval_start date formatted as hive partition with year=/month=/day=/hour= YMDH_PARTITION = \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-gen_rev_map.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-gen_rev_map.expected index e30a012f1..cad515608 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-gen_rev_map.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-gen_rev_map.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table \ discovery.wikibase_rdf/date=20230212/wiki=commons \ --output-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-munge_dumps.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-munge_dumps.expected index d9f88bdb9..69179b4dd 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-munge_dumps.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_commons_ttl-munge_dumps.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-path \ hdfs://analytics-hadoop/wmf/data/raw/commons/dumps/mediainfo-ttl/20230212/commons-20230212-mediainfo.ttl.bz2 \ --output-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_full.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_full.expected index 6b7070a26..c42d46f5b 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_full.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_full.expected @@ -71,7 +71,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table-partition-spec \ discovery.wikibase_rdf/date=20230213/wiki=wikidata \ --output-hdfs-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_main.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_main.expected index 1aea6de79..0576945da 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_main.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_main.expected @@ -71,7 +71,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table-partition-spec \ discovery.wikibase_rdf_subgraphs/snapshot=20230213/wiki=wikidata/scope=wikidata_main \ --output-hdfs-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_scholarly.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_scholarly.expected index 50bec1c9d..21e9530e8 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_scholarly.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-export_n3_scholarly.expected @@ -71,7 +71,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table-partition-spec \ discovery.wikibase_rdf_subgraphs/snapshot=20230213/wiki=wikidata/scope=scholarly_articles \ --output-hdfs-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-gen_rev_map.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-gen_rev_map.expected index 79d3c38b1..9a87cc55c 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-gen_rev_map.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-gen_rev_map.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table \ discovery.wikibase_rdf/date=20230213/wiki=wikidata \ --output-path \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-munge_dumps.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-munge_dumps.expected index e6e93fb1b..871d911a8 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-munge_dumps.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-munge_dumps.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-path \ hdfs://analytics-hadoop/wmf/data/raw/wikidata/dumps/all_ttl/20230213/wikidata-20230213-all-BETA.ttl.bz2,hdfs://analytics-hadoop/wmf/data/raw/wikidata/dumps/lexemes_ttl/20230210/wikidata-20230210-lexemes-BETA.ttl.bz2 \ --output-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-split_subgraphs.expected b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-split_subgraphs.expected index 281ce6498..1c2564703 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-split_subgraphs.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_import_ttl.py-import_wikidata_ttl-split_subgraphs.expected @@ -73,7 +73,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table-partition-spec \ discovery.wikibase_rdf/date=20230213/wiki=wikidata \ --output-table-partition-spec \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_process_sparql_query.py-process_sparql_query_hourly-extract_queries.expected b/tests/search/fixtures/spark_skein_specs/search_dags_process_sparql_query.py-process_sparql_query_hourly-extract_queries.expected index a59173a86..8f54466e0 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_process_sparql_query.py-process_sparql_query_hourly-extract_queries.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_process_sparql_query.py-process_sparql_query_hourly-extract_queries.expected @@ -71,7 +71,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --input-table \ event.wdqs_external_sparql_query/year=2021/month=6/day=1/hour=1 \ --output-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wcqs_streaming_updater_reconcile_hourly-reconcile.expected b/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wcqs_streaming_updater_reconcile_hourly-reconcile.expected index ac54dda4f..926f62c50 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wcqs_streaming_updater_reconcile_hourly-reconcile.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wcqs_streaming_updater_reconcile_hourly-reconcile.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --domain \ commons.wikimedia.org \ --reconciliation-source \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wdqs_streaming_updater_reconcile_hourly-reconcile.expected b/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wdqs_streaming_updater_reconcile_hourly-reconcile.expected index 0b312db24..2284d8bd6 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wdqs_streaming_updater_reconcile_hourly-reconcile.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_rdf_streaming_updater_reconcile.py-wdqs_streaming_updater_reconcile_hourly-reconcile.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --domain \ www.wikidata.org \ --reconciliation-source \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_mapping_weekly-map_subgraphs.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_mapping_weekly-map_subgraphs.expected index a7f7f7da2..430582ee9 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_mapping_weekly-map_subgraphs.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_mapping_weekly-map_subgraphs.expected @@ -69,7 +69,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --wikidata-table \ discovery.wikibase_rdf/date=20220711/wiki=wikidata \ --all-subgraphs-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_query_mapping_daily-map_subgraphs_queries.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_query_mapping_daily-map_subgraphs_queries.expected index 124850b0d..d52791d93 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_query_mapping_daily-map_subgraphs_queries.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_mapping.py-subgraph_query_mapping_daily-map_subgraphs_queries.expected @@ -73,7 +73,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ --wikidata-table \ discovery.wikibase_rdf/date=20220620/wiki=wikidata \ --top-subgraph-items-table \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_detailed_subgraph_metrics.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_detailed_subgraph_metrics.expected index d64a8c382..637f16d5c 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_detailed_subgraph_metrics.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_detailed_subgraph_metrics.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ detailed-subgraph-metrics \ --all-subgraphs-table \ discovery.all_subgraphs/snapshot=20220704/wiki=wikidata \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_general_subgraph_metrics.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_general_subgraph_metrics.expected index aacae12e3..02870383f 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_general_subgraph_metrics.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_metrics_weekly-extract_general_subgraph_metrics.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ general-subgraph-metrics \ --wikidata-triples-table \ discovery.wikibase_rdf/date=20220704/wiki=wikidata \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_pair_query_metrics.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_pair_query_metrics.expected index 5cb084f59..d9d873631 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_pair_query_metrics.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_pair_query_metrics.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ subgraph-pair-query-metrics \ --subgraph-query-table \ discovery.subgraph_queries/year=2022/month=7/day=2/wiki=wikidata \ diff --git a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_query_metrics.expected b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_query_metrics.expected index 925a87c35..d2e812a0b 100644 --- a/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_query_metrics.expected +++ b/tests/search/fixtures/spark_skein_specs/search_dags_subgraph_and_query_metrics.py-subgraph_query_metrics_daily-extract_subgraph_query_metrics.expected @@ -67,7 +67,7 @@ master: production \ --deploy-mode \ client \ - hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.155-jar-with-dependencies.jar \ + hdfs:///wmf/cache/artifacts/airflow/search/rdf-spark-tools-0.3.158-jar-with-dependencies.jar \ query-metrics \ --event-query-table \ event.wdqs_external_sparql_query/year=2022/month=7/day=2/graph_name=wikidata_full \ -- GitLab