document-api-python/tableaudocumentapi/datasource.py at test · Snow-rider/document-api-python

History

321 lines (241 loc) · 10.8 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

import collections

import itertools

import xml.etree.ElementTree as ET

import xml.sax.saxutils as sax

from uuid import uuid4

from tableaudocumentapi import Connection, xfile, ConnectionRelation

from tableaudocumentapi import Field

from tableaudocumentapi.multilookup_dict import MultiLookupDict

from tableaudocumentapi.xfile import xml_open

from tableaudocumentapi.column import Column

from tableaudocumentapi.columnInstance import ColumnInstance

from tableaudocumentapi.datasource_group import DatasourceGroup

from tableaudocumentapi.extract import DatasourceExtract

from tableaudocumentapi.style_encoding import StyleEncoding

from tableaudocumentapi.datasource_dependency import DatasourceDependency

########

# This is needed in order to determine if something is a string or not. It is necessary because

# of differences between python2 (basestring) and python3 (str). If python2 support is ever

# dropped, remove this and change the basestring references below to str

try:

basestring

except NameError: # pragma: no cover

basestring = str

########

_ColumnObjectReturnTuple = collections.namedtuple('_ColumnObjectReturnTupleType', ['id', 'object'])

def _get_metadata_xml_for_field(root_xml, field_name):

if "'" in field_name:

field_name = sax.escape(field_name, {"'": "'"})

xpath = u".//metadata-record[@class='column'][local-name='{}']".format(field_name)

return root_xml.find(xpath)

def _is_used_by_worksheet(names, field):

return any(y for y in names if y in field.worksheets)

class FieldDictionary(MultiLookupDict):

def used_by_sheet(self, name):

# If we pass in a string, no need to get complicated, just check to see if name is in

# the field's list of worksheets

if isinstance(name, basestring):

return [x for x in self.values() if name in x.worksheets]

# if we pass in a list, we need to check to see if any of the names in the list are in

# the field's list of worksheets

return [x for x in self.values() if _is_used_by_worksheet(name, x)]

def _column_object_from_column_xml(root_xml, column_xml):

field_object = Field.from_column_xml(column_xml)

local_name = field_object.id

metadata_record = _get_metadata_xml_for_field(root_xml, local_name)

if metadata_record is not None:

field_object.apply_metadata(metadata_record)

return _ColumnObjectReturnTuple(field_object.id, field_object)

def _column_object_from_metadata_xml(metadata_xml):

field_object = Field.from_metadata_xml(metadata_xml)

return _ColumnObjectReturnTuple(field_object.id, field_object)

def base36encode(number):

"""Converts an integer into a base36 string."""

ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"

base36 = ''

sign = ''

if number < 0:

sign = '-'

number = -number

if 0 <= number < len(ALPHABET):

return sign + ALPHABET[number]

while number != 0:

number, i = divmod(number, len(ALPHABET))

base36 = ALPHABET[i] + base36

return sign + base36

def _make_unique_name(dbclass):

rand_part = base36encode(uuid4().int)

name = dbclass + '.' + rand_part

return name

class ConnectionParser(object):

"""Parser for detecting and extracting connections from differing Tableau file formats."""

def __init__(self, datasource_xml, version):

self._dsxml = datasource_xml

self._dsversion = version

def _extract_federated_connections(self):

connections = list(map(Connection, self._dsxml.findall("./connection[@class='federated']")))

# 'sqlproxy' connections (Tableau Server Connections) are not embedded into named-connection elements

# extract them manually for now

#Ignore sqlproxy datasources as we do not want to modify them in our current issue

#connections.extend(map(Connection, self._dsxml.findall("./connection[@class='sqlproxy']")))

return connections

def _extract_legacy_connection(self):

return list(map(Connection, self._dsxml.findall('connection')))

def get_connections(self):

"""Find and return all connections based on file format version."""

if float(self._dsversion) < 10:

connections = self._extract_legacy_connection()

else:

connections = self._extract_federated_connections()

return connections

class RelationParser(object):

"""Parser for detecting and extracting relations in a Data Source."""

def __init__(self, dsxml):

self._dsxml = dsxml

def get_relations(self):

"""Finds and return all relation elements for federated connections within the data source."""

relations = list(map(ConnectionRelation, self._dsxml.findall('./connection//relation')))

return relations

class Datasource(object):

"""A class representing Tableau Data Sources, embedded in workbook files or

in TDS files.

"""

def __init__(self, dsxml, filename=None):

"""

Constructor. Default is to create datasource from xml.

"""

self._filename = filename

self._datasourceXML = dsxml

self._datasourceTree = ET.ElementTree(self._datasourceXML)

self._name = self._datasourceXML.get('name') or self._datasourceXML.get(

'formatted-name') # TDS files don't have a name attribute

self._version = self._datasourceXML.get('version')

self._caption = self._datasourceXML.get('caption', '')

self._connection_parser = ConnectionParser(

self._datasourceXML, version=self._version)

self._connections = self._connection_parser.get_connections()

self._relation_parser = RelationParser(self._datasourceXML)

self._connection_relations = self._relation_parser.get_relations()

self._fields = None

self._columns = list(map(Column, self._datasourceXML.findall('column')))

self._column_instances = list(ColumnInstance(clmInst) for clmInst in self._datasourceXML.findall('column-instance'))

self._groups = list(DatasourceGroup(grpInst) for grpInst in self._datasourceXML.findall('group'))

self._extract = DatasourceExtract(self._datasourceXML.find('extract'))

self._style_encoding = StyleEncoding(self._datasourceXML.find('style/*/encoding'))

self._ds_dep_xml = self._datasourceXML.findall('./datasource-dependencies')

self._datasource_dependencies = list(map(DatasourceDependency, self._ds_dep_xml)) if bool(self._ds_dep_xml) else []

@classmethod

def from_file(cls, filename):

"""Initialize datasource from file (.tds ot .tdsx)"""

dsxml = xml_open(filename, 'datasource').getroot()

return cls(dsxml, filename)

@classmethod

def from_connections(cls, caption, connections):

"""Create a new Data Source give a list of Connections."""

root = ET.Element('datasource', caption=caption, version='10.0', inline='true')

outer_connection = ET.SubElement(root, 'connection')

outer_connection.set('class', 'federated')

named_conns = ET.SubElement(outer_connection, 'named-connections')

for conn in connections:

nc = ET.SubElement(named_conns,

'named-connection',

name=_make_unique_name(conn.dbclass),

caption=conn.server)

nc.append(conn._connectionXML)

return cls(root)

def change_field_name_in_datasource(self, original_column_name, new_column_name, datasource_name):

"""

tags where the change has to happen:

metadata-record, is per connection but parent-name is datasource name + metadata-record in extract/connection/metadata-records

column, per datasource - list columns from ds which name is as passed, some columns are calculations

column-instance, per datasource - same as for columns

folder-item, per ds but //folder-item

bucket tag in style//bucket containing field name but also bound to datasource_name

"""

pass

def save(self):

"""

Call finalization code and save file.

Args:

None.

Returns:

Nothing.

"""

# save the file

xfile._save_file(self._filename, self._datasourceTree)

def save_as(self, new_filename):

"""

Save our file with the name provided.

Args:

new_filename: New name for the workbook file. String.

Returns:

Nothing.

"""

xfile._save_file(self._filename, self._datasourceTree, new_filename)

@property

def name(self):

return self._name

@property

def version(self):

return self._version

@property

def caption(self):

return self._caption

@caption.setter

def caption(self, value):

self._datasourceXML.set('caption', value)

self._caption = value

@caption.deleter

def caption(self):

del self._datasourceXML.attrib['caption']

self._caption = ''

@property

def connections(self):

return self._connections

@property

def style_encoding(self):

return self._style_encoding

@property

def connection_relations(self):

return self._connection_relations

def clear_repository_location(self):

tag = self._datasourceXML.find('./repository-location')

if tag is not None:

self._datasourceXML.remove(tag)

@property

def fields(self):

if not self._fields:

self._fields = self._get_all_fields()

return self._fields

@property

def columns(self):

return self._columns

@property

def column_instances(self):

return self._column_instances

@property

def groups(self):

return self._groups

@property

def extract(self):

return self._extract

@property

def style_encoding(self):

return self._style_encoding

@property

def datasource_dependencies(self):

return self._datasource_dependencies

def _get_all_fields(self):

# Some columns are represented by `column` tags and others as `metadata-record` tags

# Find them all and chain them into one dictionary

column_field_objects = self._get_column_objects()

existing_column_fields = [x.id for x in column_field_objects]

metadata_only_field_objects = (x for x in self._get_metadata_objects() if x.id not in existing_column_fields)

field_objects = itertools.chain(column_field_objects, metadata_only_field_objects)

return FieldDictionary({k: v for k, v in field_objects})

def _get_metadata_objects(self):

return (_column_object_from_metadata_xml(x)

for x in self._datasourceTree.findall(".//metadata-record[@class='column']"))

def _get_column_objects(self):

return [_column_object_from_column_xml(self._datasourceTree, xml)

for xml in self._datasourceTree.findall('.//column')]

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

datasource.py

Latest commit

History

datasource.py

File metadata and controls