|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +# Copyright (C) 2012 p12 <tir5c3@yahoo.co.uk> |
| 4 | +# |
| 5 | +# This file is part of cppreference-doc |
| 6 | +# |
| 7 | +# This program is free software: you can redistribute it and/or modify |
| 8 | +# it under the terms of the GNU General Public License as published by |
| 9 | +# the Free Software Foundation, either version 3 of the License, or |
| 10 | +# (at your option) any later version. |
| 11 | +# |
| 12 | +# This program is distributed in the hope that it will be useful, |
| 13 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | +# GNU General Public License for more details. |
| 16 | +# |
| 17 | +# You should have received a copy of the GNU General Public License |
| 18 | +# along with this program. If not, see http://www.gnu.org/licenses/. |
| 19 | + |
| 20 | +# This file examines all html files in the output directory and writes |
| 21 | +# filename -> title mapping to a xml file. |
| 22 | + |
| 23 | +import fnmatch |
| 24 | +import re |
| 25 | +import os |
| 26 | + |
| 27 | +# find all html files |
| 28 | +html_files = [] |
| 29 | +for root, dirnames, filenames in os.walk('output'): |
| 30 | + for filename in fnmatch.filter(filenames, '*.html'): |
| 31 | + html_files.append(os.path.join(root, filename)) |
| 32 | + |
| 33 | +# create an xml file containing mapping between page title and actual location |
| 34 | +out = open('link-map.xml', 'w') |
| 35 | +out.write('<?xml version="1.0" encoding="UTF-8"?><files>\n') |
| 36 | + |
| 37 | +for fn in html_files: |
| 38 | + f = open(fn, "r") |
| 39 | + text = f.read() |
| 40 | + f.close() |
| 41 | + |
| 42 | + m = re.search('<script>[^<]*mw\.config\.set([^<]*wgPageName[^<]*)</script>', text) |
| 43 | + if not m: |
| 44 | + continue |
| 45 | + text = m.group(1) |
| 46 | + text = re.sub('\s*', '', text) |
| 47 | + m = re.search('"wgPageName":"([^"]*)"', text) |
| 48 | + if not m: |
| 49 | + continue |
| 50 | + |
| 51 | + title = m.group(1) |
| 52 | + |
| 53 | + target = os.path.relpath(os.path.abspath(fn), os.path.abspath('output')) |
| 54 | + out.write(' <file from="' + title + '" to="' + target + '" />\n') |
| 55 | + |
| 56 | +out.write('</files>') |
| 57 | +out.close() |
0 commit comments