Skip to content

Commit 014c1b8

Browse files
authored
Merge pull request #10 from yutannihilation/let-httr-build-url
let httr build URL
2 parents 8556b10 + eeccce8 commit 014c1b8

File tree

9 files changed

+155
-61
lines changed

9 files changed

+155
-61
lines changed

R/categories.R

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,18 @@ categories_in_page <- function(language = NULL, project = NULL, domain = NULL,
5757
} else {
5858
show_hidden <- "!hidden"
5959
}
60-
url <- url_gen(language, project, domain,
61-
paste0("&action=query&prop=categories&clprop=", properties,
62-
"&clshow=", show_hidden, "&cllimit=", limit,
63-
"&titles=",pages))
60+
url <- url_gen(language, project, domain)
61+
query_param <- list(
62+
action = "query",
63+
prop = "categories",
64+
clprop = properties,
65+
clshow = show_hidden,
66+
cllimit= limit,
67+
titles = pages
68+
)
6469

6570
#Retrieve, check, return
66-
content <- query(url, "pagecats", clean_response, ...)
71+
content <- query(url, "pagecats", clean_response, query_param = query_param, ...)
6772
page_names <- names(unlist(content))
6873
missing_pages <- sum(grepl(x = page_names, pattern = "missing"))
6974
if(missing_pages){
@@ -138,10 +143,17 @@ pages_in_category <- function(language = NULL, project = NULL, domain = NULL, ca
138143
type <- paste(type, collapse = "|")
139144

140145
#Construct URL
141-
url <- url_gen(language, project, domain, "&action=query&list=categorymembers&cmtitle=",
142-
categories, "&cmprop=", properties, "&cmtype=",type, "&cmlimit=", limit)
146+
url <- url_gen(language, project, domain)
147+
query_param <- list(
148+
action = "query",
149+
list = "categorymembers",
150+
cmtitle = categories,
151+
cmprop = properties,
152+
cmtype = type,
153+
cmlimit = limit
154+
)
143155

144156
#Query and return
145-
content <- query(url, "catpages", clean_response, ...)
157+
content <- query(url, "catpages", clean_response, query_param = query_param, ...)
146158
return(content)
147159
}

R/content.R

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,17 @@ random_page <- function(language = NULL, project = NULL, domain = NULL,
5353
clean_response = FALSE, ...){
5454

5555

56-
url <- url_gen(language, project, domain, "&action=query&list=random&rnlimit=", limit)
56+
url <- url_gen(language, project, domain)
57+
query_param <- list(
58+
action = "query",
59+
list = "random",
60+
rnlimit = limit
61+
)
62+
5763
if(!is.null(namespaces)){
58-
url <- paste0(url, "&rnnamespace=", paste(namespaces, collapse = "|"))
64+
query_param$rnnamespace <- paste(namespaces, collapse = "|")
5965
}
60-
pages <- query(url, NULL, FALSE)$query$random
66+
pages <- query(url, NULL, FALSE, query_param = query_param)$query$random
6167

6268
return(lapply(pages, function(page, language, project, domain, page_name, as_wikitext,
6369
clean_response, ...){
@@ -122,17 +128,19 @@ page_content <- function(language = NULL, project = NULL, domain = NULL,
122128
properties <- "text|revid"
123129
}
124130
properties <- paste(properties, collapse = "|")
125-
url <- url_gen(language, project, domain, "&action=parse&prop=", properties)
131+
url <- url_gen(language, project, domain)
132+
query_param <- list(
133+
action = "parse",
134+
prop = properties
135+
)
126136
if(!is.null(page_id)){
127-
page_id <- handle_limits(page_id, 1)
128-
url <- paste0(url, "&pageid=", page_id)
137+
query_param$page_id <- handle_limits(page_id, 1)
129138
} else {
130-
page_name <- handle_limits(page_name, 1)
131-
url <- paste0(url, "&page=", page_name)
139+
query_param$page <- handle_limits(page_name, 1)
132140
}
133141

134142
#Run
135-
content <- query(url, "pcontent", clean_response, ...)
143+
content <- query(url, "pcontent", clean_response, query_param = query_param, ...)
136144

137145
#Return
138146
return(content)
@@ -194,12 +202,17 @@ revision_content <- function(language = NULL, project = NULL, domain = NULL,
194202
properties <- match.arg(arg = properties, several.ok = TRUE)
195203
properties <- paste(properties, collapse = "|")
196204
revisions <- handle_limits(revisions, 50)
197-
url <- url_gen(language, project, domain,
198-
"&rvcontentformat=text/x-wiki&action=query&prop=revisions&rvprop=",
199-
properties, "&revids=",revisions)
205+
url <- url_gen(language, project, domain)
206+
query_param <- list(
207+
rvcontentformat = "text/x-wiki",
208+
action = "query",
209+
prop = "revisions",
210+
rvprop = properties,
211+
revids = revisions
212+
)
200213

201214
#Run
202-
content <- query(url, "rcontent", clean_response, ...)
215+
content <- query(url, "rcontent", clean_response, query_param = query_param, ...)
203216

204217
#Check for invalid RevIDs
205218
invalid_revs(content)
@@ -277,13 +290,19 @@ revision_diff <- function(language = NULL, project = NULL, domain = NULL,
277290
properties <- match.arg(properties, several.ok = TRUE)
278291
properties <- paste(properties, collapse = "|")
279292
revisions <- handle_limits(revisions, 50)
280-
url <- url_gen(language, project, domain, "&action=query&prop=revisions&rvprop=",
281-
properties, "&rvdiffto=", direction, "&rvcontentformat=text/css&revids=",
282-
revisions)
293+
url <- url_gen(language, project, domain)
294+
query_param <- list(
295+
action = "query",
296+
prop = "revisions",
297+
rvprop = properties,
298+
rvdiffto = direction,
299+
rvcontentformat = "text/css",
300+
revids = revisions
301+
)
283302

284303
#Retrieve the content, check for invalid RevIDs and uncached diffs,
285304
#return.
286-
content <- query(url, "rdiff", clean_response, ...)
305+
content <- query(url, "rdiff", clean_response, query_param = query_param, ...)
287306
invalid_revs(content)
288307
if(sum(grepl(x = names(unlist(content)), pattern = "diff.notcached"))){
289308
warning("This request contained uncached diffs; these will not be returned", call. = FALSE)

R/metadata.R

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,19 @@ page_backlinks <- function(language = NULL, project = NULL, domain = NULL,
4646
page, limit = 50, direction = "ascending", namespaces = NULL,
4747
clean_response = FALSE, ...){
4848

49-
url <- url_gen(language, project, domain, "&action=query&list=backlinks&bltitle=", page,
50-
"&bldir=", direction, "&bllimit=", limit)
49+
url <- url_gen(language, project, domain)
50+
query_param <- list(
51+
action = "query",
52+
list = "backlinks",
53+
bltitle = page,
54+
bldir = direction,
55+
bllimit = limit
56+
)
57+
5158
if(!is.null(namespaces)){
52-
url <- paste0(url,"&blnamespace=",paste(namespaces, collapse = "|"))
59+
query_param$blnamespace <- paste(namespaces, collapse = "|")
5360
}
54-
content <- query(url, "blink", clean_response, ...)
61+
content <- query(url, "blink", clean_response, query_param = query_param, ...)
5562
return(content)
5663
}
5764

@@ -99,13 +106,19 @@ page_links <- function(language = NULL, project = NULL, domain = NULL,
99106
page, limit = 50, direction = "ascending", namespaces = NULL,
100107
clean_response = FALSE, ...){
101108

102-
url <- url_gen(language, project, domain, "&action=query&prop=links&titles=", page,
103-
"&pldir=", direction, "&pllimit=", limit)
109+
url <- url_gen(language, project, domain)
110+
query_param <- list(
111+
action = "query",
112+
prop = "links",
113+
titles = page,
114+
pldir = direction,
115+
pllimit = limit
116+
)
104117

105118
if(!is.null(namespaces)){
106-
url <- paste0(url,"&plnamespace=",paste(namespaces, collapse = "|"))
119+
query_param$plnamespace <- paste(namespaces, collapse = "|")
107120
}
108-
content <- query(url, "plink", clean_response, ...)
121+
content <- query(url, "plink", clean_response, query_param = query_param, ...)
109122
return(content)
110123
}
111124

@@ -148,11 +161,16 @@ page_external_links <- function(language = NULL, project = NULL, domain = NULL,
148161
page, protocol = NULL, clean_response = FALSE,
149162
...){
150163

151-
url <- url_gen(language, project, domain, "&action=query&prop=extlinks&titles=", page)
164+
url <- url_gen(language, project, domain)
165+
query_param <- list(
166+
action = "query",
167+
prop = "extlinks",
168+
titles = page
169+
)
152170
if(!is.null(protocol)){
153-
url <- paste0(url,"&elprotocol=", protocol)
171+
query_param$elprotocol <- protocol
154172
}
155-
content <- query(url, "elink", clean_response, ...)
173+
content <- query(url, "elink", clean_response, query_param = query_param, ...)
156174
return(content)
157175
}
158176

@@ -195,7 +213,13 @@ page_info <- function(language = NULL, project = NULL, domain = NULL,
195213

196214
properties <- match.arg(arg = properties, several.ok = TRUE)
197215
properties <- paste(properties, collapse = "|")
198-
url <- url_gen(language, project, domain, "&action=query&prop=info&inprop=", properties, "&titles=", page)
199-
content <- query(url, "pageinfo", clean_response, ...)
216+
url <- url_gen(language, project, domain)
217+
query_param <- list(
218+
action = "query",
219+
prop = "info",
220+
inprop = properties,
221+
titles = page
222+
)
223+
content <- query(url, "pageinfo", clean_response, query_param = query_param, ...)
200224
return(content)
201225
}

R/query.R

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,26 @@
1111
#'@param clean_response whether to clean the response, using the method assigned
1212
#'by out_class, or not.
1313
#'
14+
#'@param query_param query parameters
15+
#'
1416
#'@param ... further arguments to httr's GET.
1517
#'@export
16-
query <- function(url, out_class, clean_response = FALSE, ...){
18+
query <- function(url, out_class, clean_response = FALSE, query_param = list(), ...){
19+
# Common query parameters
20+
if(is.null(query_param$format)) {
21+
query_param$format <- "json"
22+
}
23+
24+
# Add http scheme if url has no scheme
25+
if(!any(startsWith(url, c("http://", "https://")))) {
26+
url <- paste0("http://", url)
27+
}
1728

18-
#Encode url, add "http://", query
19-
url <- paste0("http://",utils::URLencode(url))
2029
args <- list(...)
2130
if(length(args) > 0 && "config" %in% class(args[[1]]) && "useragent" %in% names(args[[1]])){
22-
response <- httr::GET(url, ...)
31+
response <- httr::GET(url, query = query_param, ...)
2332
} else {
24-
response <- httr::GET(url, httr::user_agent("WikipediR - https://github.com/Ironholds/WikipediR"), ...)
33+
response <- httr::GET(url, query = query_param, httr::user_agent("WikipediR - https://github.com/Ironholds/WikipediR"), ...)
2534
}
2635

2736
#Check the validity of the response
@@ -46,12 +55,12 @@ url_gen <- function(language, project, domain = NULL, ...){
4655
if(is.null(domain)){
4756
#Commons and Wikispecies have different URL formats, so those have to be handled in a hinky way.
4857
if(project %in% c("commons","species")){
49-
url <- paste0(project, ".wikimedia.org/w/api.php?format=json", ...)
58+
url <- sprintf("http://%s.wikimedia.org/w/api.php", project)
5059
} else {
51-
url <- paste0(language, "." ,project, ".org/w/api.php?format=json", ...)
60+
url <- sprintf("http://%s.%s.org/w/api.php", language, project)
5261
}
5362
} else {
54-
url <- paste0(domain,"/w/api.php?format=json", ...)
63+
url <- sprintf("http://%s/w/api.php", domain)
5564
}
5665

5766
#Return

R/recent_changes.R

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,24 @@ recent_changes <- function(language = NULL, project = NULL, domain = NULL,
6262
type <- paste(type, collapse = "|")
6363
properties <- match.arg(arg = properties, several.ok = TRUE)
6464
properties <- paste(properties, collapse = "|")
65-
url <- url_gen(language, project, domain, "&action=query&list=recentchanges&rcdir=",
66-
dir, "&rcprop=", properties, "&rctype=", type, "&rclimit=", limit)
65+
url <- url_gen(language, project, domain)
66+
query_param <- list(
67+
action = "query",
68+
list = "recentchanges",
69+
rcdir = dir,
70+
rcprop = properties,
71+
rctype = type,
72+
rclimit= limit
73+
)
6774
if(!is.null(tag)){
68-
url <- paste0(url, "&rctag=", paste(tag, collapse = "|"))
75+
query_param$rctag <- paste(tag, collapse = "|")
6976
}
7077
if(top){
71-
url <- paste0(url, "&rctoponly")
78+
query_param$rctoponly <- ""
7279
}
7380

7481
#Query
75-
content <- query(url, "rchanges", clean_response, ...)
82+
content <- query(url, "rchanges", clean_response, query_param = query_param, ...)
7683

7784
#Return
7885
return(content)

R/user_info.R

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,22 @@ user_contributions <- function(language = NULL, project = NULL, domain = NULL,
7373
properties <- match.arg(properties, several.ok = TRUE)
7474
properties <- paste(properties, collapse = "|")
7575
username <- handle_limits(username, 1)
76-
url <- url_gen(language, project, domain,
77-
paste0("&action=query&list=usercontribs&uclimit=", limit,
78-
"&ucuser=", username, "&ucprop=", properties))
76+
url <- url_gen(language, project, domain)
77+
query_param <- list(
78+
action = "query",
79+
list = "usercontribs",
80+
uclimit = limit,
81+
ucuser = username,
82+
ucprop = properties
83+
)
7984

8085
#If only article contributions are desired, note that.
8186
if(mainspace){
82-
url <- paste0(url, "&ucnamespace=0", sep = "")
87+
query_param$ucnamespace <- 0
8388
}
8489

8590
#Get, check and return
86-
contribs_content <- query(url, "ucontribs", clean_response, ...)
91+
contribs_content <- query(url, "ucontribs", clean_response, query_param = query_param, ...)
8792
missing_users(contribs_content)
8893
return(contribs_content)
8994
}
@@ -162,11 +167,17 @@ user_information <- function(language = NULL, project = NULL, domain = NULL,
162167
properties <- match.arg(properties, several.ok = TRUE)
163168
properties <- paste(properties, collapse = "|")
164169
user_names <- handle_limits(user_names, 50)
165-
url <- url_gen(language, project, domain,
166-
paste0("&action=query&list=users&usprop=",properties,"&ususers=",user_names))
167-
170+
url <- url_gen(language, project, domain)
171+
query_param = list(
172+
action = "query",
173+
list = "users",
174+
usprop = properties,
175+
ususers = user_names
176+
177+
)
178+
168179
#Retrieve the content, check it, return.
169-
user_content <- query(url, "uinfo", clean_response, ...)
180+
user_content <- query(url, "uinfo", clean_response, query_param = query_param, ...)
170181
missing_users(user_content)
171182
return(user_content)
172183

man/query.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test_category_retrieval.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,9 @@ test_that("Category members can be retrieved through categories_in_page", {
1414

1515
test_that("Category members can be retrieved through categories_in_page", {
1616
expect_true({pages_in_category("en","wikipedia", categories = "1920s births", type = "subcat");TRUE})
17+
})
18+
19+
test_that("page categories can be retrieved through categories_in_page with non-ASCII query", {
20+
# \u30dd\u30b1\u30e2\u30f3 is "Pokemon" in Japanese letters
21+
expect_true({categories_in_page("en","wikipedia", page = "\u30dd\u30b1\u30e2\u30f3");TRUE})
1722
})

tests/testthat/test_content_retrieval.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,9 @@ test_that("HTML content can be retrieved through revision_content", {
1616

1717
test_that("Diffs can be retrieved through revision_content", {
1818
expect_true({revision_diff("en","wikipedia", revisions = "129122231", direction = "next");TRUE})
19+
})
20+
21+
test_that("Wikitext content can be retrieved through page_content with non-ASCII query", {
22+
# \u30dd\u30b1\u30e2\u30f3 is "Pokemon" in Japanese letters
23+
expect_true({page_content("en","wikipedia", page_name = "\u30dd\u30b1\u30e2\u30f3", as_wikitext=TRUE);TRUE})
1924
})

0 commit comments

Comments
 (0)