2424import sys
2525import stat
2626import locale
27+ import io
2728
2829# Local imports
2930from winpython .py3compat import winreg
3031
3132def get_python_executable (path = None ):
3233 """return the python executable"""
3334 my_path = sys .executable if path == None else path # default = current one
34- my_path = path if osp .isdir (path ) else osp .dirname (path )
35+ my_path = my_path if osp .isdir (my_path ) else osp .dirname (my_path )
3536 exec_py = os .path .join (path , 'python.exe' )
3637 exec_pypy = os .path .join (path , 'pypy3.exe' ) # PyPy !
3738 python_executable = exec_pypy if osp .isfile (exec_pypy ) else exec_py
@@ -40,7 +41,7 @@ def get_python_executable(path = None):
4041def get_site_packages_path (path = None ):
4142 """return the python site-packages"""
4243 my_path = sys .executable if path == None else path # default = current one
43- my_path = path if osp .isdir (path ) else osp .dirname (path )
44+ my_path = my_path if osp .isdir (my_path ) else osp .dirname (my_path )
4445 site_py = os .path .join (path , 'Lib' , 'site-packages' )
4546 site_pypy = os .path .join (path , 'site-packages' ) # PyPy !!
4647 site_packages_path = site_pypy if osp .isfile (site_pypy ) else site_py
@@ -501,6 +502,25 @@ def patch_shebang_line_py(
501502 print (line , end = '' )
502503
503504
505+ # =============================================================================
506+ # Guess encoding (shall rather be utf-8 per default)
507+ # =============================================================================
508+ def guess_encoding (csv_file ):
509+ """guess the encoding of the given file"""
510+ # UTF_8_BOM = "\xEF\xBB\xBF"
511+ # Python behavior on UTF-16 not great on write, so we drop it
512+ with io .open (csv_file , "rb" ) as f :
513+ data = f .read (5 )
514+ if data .startswith (b"\xEF \xBB \xBF " ): # UTF-8 with a "BOM" (normally no BOM in utf-8)
515+ return ["utf-8-sig" ]
516+ else : # in Windows, guessing utf-8 doesn't work, so we have to try
517+ try :
518+ with io .open (csv_file , encoding = "utf-8" ) as f :
519+ preview = f .read (222222 )
520+ return ["utf-8" ]
521+ except :
522+ return [locale .getdefaultlocale ()[1 ], "utf-8" ]
523+
504524# =============================================================================
505525# Patch sourcefile (instead of forking packages)
506526# =============================================================================
@@ -511,7 +531,8 @@ def patch_sourcefile(
511531 import io
512532
513533 if osp .isfile (fname ) and not in_text == out_text :
514- with io .open (fname , 'r' ) as fh :
534+ the_encoding = guess_encoding (fname )[0 ]
535+ with io .open (fname , 'r' , encoding = the_encoding ) as fh :
515536 content = fh .read ()
516537 new_content = content .replace (in_text , out_text )
517538 if not new_content == content :
@@ -524,7 +545,7 @@ def patch_sourcefile(
524545 "to" ,
525546 out_text ,
526547 )
527- with io .open (fname , 'wt' ) as fh :
548+ with io .open (fname , 'wt' , encoding = the_encoding ) as fh :
528549 fh .write (new_content )
529550
530551
@@ -543,7 +564,8 @@ def patch_sourcelines(
543564 import os .path as osp
544565
545566 if osp .isfile (fname ):
546- with io .open (fname , 'r' ) as fh :
567+ the_encoding = guess_encoding (fname )[0 ]
568+ with io .open (fname , 'r' , encoding = the_encoding ) as fh :
547569 contents = fh .readlines ()
548570 content = "" .join (contents )
549571 for l in range (len (contents )):
@@ -575,7 +597,7 @@ def patch_sourcelines(
575597 if not new_content == content :
576598 # if not silent_mode:
577599 # print("patching ", fname, "from", content, "to", new_content)
578- with io .open (fname , 'wt' ) as fh :
600+ with io .open (fname , 'wt' , encoding = the_encoding ) as fh :
579601 try :
580602 fh .write (new_content )
581603 except :
0 commit comments