forked from github/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget-data.js
More file actions
328 lines (299 loc) · 11.7 KB
/
get-data.js
File metadata and controls
328 lines (299 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
import fs from 'fs'
import path from 'path'
import yaml from 'js-yaml'
import matter from 'gray-matter'
import { merge, get } from 'lodash-es'
import languages from './languages.js'
// If you run `export DEBUG_JIT_DATA_READS=true` in your terminal,
// next time it will mention every file it reads from disk.
const DEBUG_JIT_DATA_READS = Boolean(JSON.parse(process.env.DEBUG_JIT_DATA_READS || 'false'))
// This is a list of files that we should always immediately fall back to
// English for.
// Having this is safer than trying to wrangle the translations to NOT
// have them translated.
const ALWAYS_ENGLISH_YAML_FILES = new Set([
'data/variables/product.yml',
'data/variables/release_candidate.yml',
])
// Returns all the things inside a directory
export const getDeepDataByLanguage = memoize((dottedPath, langCode) => {
if (!(langCode in languages))
throw new Error(`langCode '${langCode}' not a recognized language code`)
const { dir } = languages[langCode]
return getDeepDataByDir(dottedPath, dir)
})
// Doesn't need to be memoized because it's used by getDataKeysByLanguage
// which is already memoized.
function getDeepDataByDir(dottedPath, dir) {
const fullPath = ['data']
const split = dottedPath.split(/\./g)
fullPath.push(...split)
const things = {}
const relPath = fullPath.join(path.sep)
for (const dirent of getDirents(dir, relPath)) {
if (dirent.name === 'README.md') continue
const key = dirent.isDirectory() ? dirent.name : dirent.name.replace(/\.yml$/, '') // e.g. '3-5' or '0-rc2'
if (dirent.isDirectory()) {
things[key] = getDeepDataByDir(`${dottedPath}.${key}`, dir)
} else if (dirent.name.endsWith('.yml')) {
things[key] = getYamlContent(dir, path.join(relPath, dirent.name))
} else if (dirent.name.endsWith('.md')) {
things[key] = getMarkdownContent(dir, path.join(relPath, dirent.name))
} else {
throw new Error(`don't know how to read '${dirent.name}'`)
}
}
return things
}
function getDirents(root, relPath) {
const filePath = root ? path.join(root, relPath) : relPath
return fs.readdirSync(filePath, { withFileTypes: true })
}
export const getUIDataMerged = memoize((langCode) => {
const uiEnglish = getUIData('en')
if (langCode === 'en') return uiEnglish
// Got to combine. Start with the English and put the translation on top.
// E.g.
// english = {food: "Food", drink: "Drink"}
// swedish = {food: "Mat"}
// =>
// combind = {food: "Mat", drink: "Drink"}
const combined = {}
merge(combined, uiEnglish)
merge(combined, getUIData(langCode))
return combined
})
// Doesn't need to be memoized because it's used by another function
// that is memoized.
const getUIData = (langCode) => {
const fullPath = ['data', 'ui.yml']
const { dir } = languages[langCode]
return getYamlContent(dir, fullPath.join(path.sep))
}
export const getDataByLanguage = memoize((dottedPath, langCode) => {
if (!(langCode in languages))
throw new Error(`langCode '${langCode}' not a recognized language code`)
const { dir } = languages[langCode]
try {
const value = getDataByDir(dottedPath, dir, languages.en.dir)
// What could happens is that a new key has only been added to
// the English data/ui.yml but hasn't been added to Japanese, but
// there nevertheless exists a Japanse `data/ui.yml`.
// Since getDataByDir() uses `get(dataObject, 'dott.ed.path')` it
// will return `undefined` if it's not present.
// If this happens, we can't rely on `err.code === 'ENOENT'` to
// fall back the English one. So we just start over using the English data.
if (value === undefined && langCode !== 'en') {
return getDataByDir(dottedPath, languages.en.dir)
}
return value
} catch (error) {
if (error instanceof Error && error.mark && error.message) {
// It's a yaml.load() generated error!
// Remember, the file that we read might have been a .yml or a .md
// file. If it was a .md file, with corrupt front-matter that too
// would have caused a YAMLException
if (langCode !== 'en') {
if (DEBUG_JIT_DATA_READS) {
console.warn(`Unable to parse Yaml in (${langCode}) '${dottedPath}': ${error.message}`)
}
// Give it one more chance, but use English this time
return getDataByDir(dottedPath, languages.en.dir)
}
// Always throw English Yaml reading errors. Staff writers
// need to know early and explicitly that they are corrupt.
throw error
}
if (error.code === 'ENOENT') return undefined
throw error
}
})
function getDataByDir(dottedPath, dir, englishRoot) {
const fullPath = ['data']
// Using English here because it doesn't matter. We just want to
// figure out how to turn `foo.version-3.4.deeper.key' into
// `['foo', 'version-3.4', 'deeper', 'key']` here and we'll need
// any directory to do that and English is always the most up-to-date.
// We need the getSmartSplit() as long as there's a chance that a
// directory or file inside data/ might contain a dot in the name,
// however the exception is the file names in data/release-notes/**/*.yml
// because it contains files that are just numbers like 3-7/0.yml and
// that can cause problems inside getSmartSplit().
const split = dottedPath.startsWith('release-notes')
? dottedPath.split('.')
: getSmartSplit(dottedPath)
// For early-access data stuff, they're referred to as...
//
// {% data early-access.reusables.foo.bar %}
//
// When we "merge" in the early-access data, we put the whole directory
// within the root `data/` so it exists, on disk, as
//
// data/early-access/reusables/foo/bar.md
//
if (split[0] === 'early-access') {
fullPath.push(split.shift())
}
const first = split[0]
if (first === 'variables') {
const key = split.pop()
const basename = split.pop()
fullPath.push(...split)
fullPath.push(`${basename}.yml`)
const allData = getYamlContent(dir, fullPath.join(path.sep), englishRoot)
if (allData) {
const value = allData[key]
if (value) {
return matter(value).content
}
} else {
console.warn(`Unable to find variables Yaml file ${fullPath.join(path.sep)}`)
}
return
}
if (first === 'reusables') {
const nakedname = split.pop()
fullPath.push(...split)
fullPath.push(`${nakedname}.md`)
const markdown = getMarkdownContent(dir, fullPath.join(path.sep), englishRoot)
return matter(markdown).content
}
// E.g. {% data ui.pages.foo.bar %}
if (first === 'ui') {
const basename = split.shift() // i.e. 'ui'
fullPath.push(`${basename}.yml`)
const allData = getYamlContent(dir, fullPath.join(path.sep), englishRoot)
return get(allData, split.join('.'))
}
if (first === 'product-examples' || first === 'glossaries' || first === 'release-notes') {
const basename = split.pop()
fullPath.push(...split)
fullPath.push(`${basename}.yml`)
return getYamlContent(dir, fullPath.join(path.sep), englishRoot)
}
if (first === 'learning-tracks') {
const key = split.pop()
const basename = split.pop()
fullPath.push(...split)
fullPath.push(`${basename}.yml`)
const allData = getYamlContent(dir, fullPath.join(path.sep), englishRoot)
return allData[key]
}
throw new Error(`Can't find the key '${dottedPath}' in the scope.`)
}
function getSmartSplit(dottedPath) {
const split = dottedPath.split('.')
const bits = []
for (let i = 0, len = split.length; i < len; i++) {
const bit = split[i]
if (i === len - 1) {
bits.push(bit)
} else {
const next = split[i + 1]
if (/\d$/.test(bit) && /^\d/.test(next)) {
bits.push([bit, next].join('.'))
i++ // jump ahead one position in the loop
} else {
bits.push(bit)
}
}
}
return bits
}
// The reason this is memoized, even though the parent caller function
// (`getDataByLanguage`) is also memoized is because we might read
// the same file for two different keys. E.g.
//
// getDataByLanguage('variables.product.prodname_ghe_server', 'en')
// getDataByLanguage('variables.product.company_short', 'en')
//
// ...will actually depend on reading `data/variables/product.yml`. Twice.
// Well, actually not twice because we cache the disk reading. So the outcome
// becomes this:
//
// 1. getDataByLanguage('variables.product.prodname_ghe_server', 'en')
// -> cache MISS
// 1.1. read and parse data/variables/product.yml
// -> cache MISS
// 2. getDataByLanguage('variables.product.company_short', 'en')
// -> cache MISS
// 2.1. read and parse data/variables/product.yml
// -> cache HIT (Yay!)
//
const getYamlContent = memoize((root, relPath, englishRoot) => {
// Certain Yaml files we know we always want the English one
// no matter what the specified language is.
// For example, we never want `data/variables/product.yml` translated
// so we know to immediately fall back to the English one.
if (ALWAYS_ENGLISH_YAML_FILES.has(relPath)) {
root = '' // this forces it to read from English
}
const fileContent = getFileContent(root, relPath, englishRoot)
return yaml.load(fileContent, { filename: relPath })
})
// The reason why this is memoized, is the same as for getYamlContent() above.
const getMarkdownContent = memoize((root, relPath, englishRoot) => {
const fileContent = getFileContent(root, relPath, englishRoot)
return matter(fileContent).content.trimEnd()
})
const getFileContent = (root, relPath, englishRoot) => {
const filePath = root ? path.join(root, relPath) : relPath
if (DEBUG_JIT_DATA_READS) console.log('READ', filePath)
try {
return fs.readFileSync(filePath, 'utf-8')
} catch (err) {
// It might fail because that particular data entry doesn't yet
// exist in a translation
if (err.code === 'ENOENT') {
// If looking it up as a file fails, give it one more chance if the
// read was for a translation.
if (root !== englishRoot) {
// We can try again but this time using the English files
return getFileContent(englishRoot, relPath, englishRoot)
}
}
throw err
}
}
function memoize(func) {
const cache = new Map()
return (...args) => {
if (process.env.NODE_ENV === 'development') {
// It is very possible that certain files, when caching is disabled,
// are read multiple times in short succession. E.g. `product.yml`.
// So how expensive is it to read these files excessively?
// To answer that, we benchmarked it by sampling 10 files from the
// most common files that are used from `data/`. In fact, we ran 100
// runs of 10 *different* files. About 80% of them were `.yml` files.
// As a median, it takes **0.5ms to read 10 files from disk**
// all in a sync manner.
// Since most files coming through here is `.yml` files (e.g.
// product.yml and ui.yml) if you also do the `yaml.load()` of the
// read content, that number becomes **2.1ms to read and parse 10 files**.
// So in conclusion, not a lot of time.
return func(...args)
}
const key = args.join(':')
if (!cache.has(key)) {
cache.set(key, func(...args))
}
const value = cache.get(key)
// If what was stored in the cache is a mutable, this time, return
// a shallow copy.
// Otherwise, what *might* happen is this:
//
// > const getNames = memoize(() => ["peter", "tucker"])
// > var names = getNames()
// > names.push("ashley")
// > var names2 = getNames()
// > names2.push("charlotte")
// > console.log(names2)
//
// ["peter", "tucker", "ashley", "charlotte"]
//
// Note that these are shallow copies only.
if (Array.isArray(value)) return [...value]
if (typeof value === 'object') return { ...value }
return value
}
}