B []9@sdZddlZddlZddlZddlZddlmZddlmZddl Z ddl Z ddl Z ddl m Z ddlmZmZmZmZmZmZddlmZmZddlZddlmZmZdd lmZmZm Z m!Z!m"Z"dd l#m$Z$eZ%e!Z&d d d ddddddddddddddhZ'e(eeeZ)e)*dGdddZ+ddZ,d d!Z-d"d#Z.d$d%Z/d&d'Z0d(d)Z1dDd*d+Z2d,d-Z3d.d/d0d1d2Z4d3d4Z5dEd7d8Z6Gd9d:d:ej7eZ8Gd;d<dd>e+Z:ej;d?fd@dAZc Cst|}t|rRt|}|jdd}|dkr2d}t|}||||dfSt|rxddl m }|j ||||dSt |rddl m }|j ||||dSt|tttjfrt|d|d fSt|sd } t| jt|d |d|d fS) aM If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional encoding : the encoding to use to decode bytes, default is 'utf-8' mode : str, optional Returns ------- tuple of ({a filepath_ or buffer or S3File instance}, encoding, str, compression, str, should_close, bool) zContent-EncodingNgzipTr)r:)encoding compressionmode)r<Fz0Invalid file path or buffer object type: {_type})_type)r9r)r headersgetrreadcloser;Z pandas.ior:get_filepath_or_bufferr>r<r*r+bytesmmapr0r ValueErrorformattype) r/r@rArBreqcontent_encodingreaderr:r<msgrrrrHs.    rHcCstdt|S)z converts an absolute native path to a FILE URL. Parameters ---------- path : a path in native format Returns ------- a valid FILE URL zfile:)rr )r-rrrfile_path_to_urls rRz.gzz.bz2z.zipz.xz)r?bz2zipxzcCs|dkr dS|dkrRt|}t|ts*dSx"tD]\}}||r4|Sq4WdS|tkr^|Sd|}ddgtt}|d|7}t|dS)a Get the compression method for filepath_or_buffer. If compression='infer', the inferred compression method is returned. Otherwise, the input compression method is returned unchanged, unless it's invalid, in which case an error is raised. Parameters ---------- filepath_or_buffer : a path (str) or buffer compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} If 'infer' and `filepath_or_buffer` is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no compression). Returns ------- string or None : compression method Raises ------ ValueError on invalid compression specified NZinferz!Unrecognized compression type: {}z Valid compression types are {}) r9r*r+_compression_to_extensionitemsendswithrLsortedrK)r/rA extensionrQvalidrrr_infer_compressions    r\FTcCsHyddlm}t|f}Wntk r2tf}YnXt}|} t|}t|t} | r^t||}|rx|dkr| r~t ||} n t j |d} n|dkr| rt ||} n t |} n|dkr@t||} || | jdkr| } n^| jdkrl| } t| d kr| | } n,t| dkr0td |ntd | n,|d krZtt||} nd |} t| || nJ| r|rt |||dd} n"|rt ||ddd} n t ||} || |r|st| |rddlm}|| |dd} || |r@t| dr@yt| }| |} Wntk r>YnX| |fS)a Get file handle for given path/buffer and mode. Parameters ---------- path_or_buf : a path (str) or buffer mode : str mode to open path_or_buf with encoding : str or None compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None If 'infer' and `filepath_or_buffer` is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no compression). memory_map : boolean, default False See parsers._parser_params for more information. is_text : boolean, default True whether file/buffer is in text format (csv, json, etc.), or in binary mode (pickle, etc.) Returns ------- f : file-like A file-like object handles : list of file-like objects A list of file-like object that were opened in this function. r)S3Filer?)fileobjrSrTwrzZero files found in ZIP file {}z;Multiple files found in ZIP file. Only one file per ZIP: {}rUz!Unrecognized compression type: {}r)r@newlinereplace)errorsrb) TextIOWrapperfileno)Zs3fsr]r ImportErrorlistr9r*r+r\r?openGzipFilerSBZ2File BytesZipFileappendrBnamelistlenpoprKrLr lzmaiorer6 MMapWrapperrGr')Z path_or_bufrBr@rAZ memory_mapZis_textr]Zneed_text_wrappingZhandlesfZis_pathzfZ zip_namesrQregrrr _get_handle-sv                   rwcs>eZdZdZejffdd ZfddZeddZ Z S)rla  Wrapper for standard library class ZipFile and allow the returned file-like handle to accept byte strings via `write` method. BytesIO provides attributes of file-like object and ZipFile.writestr writes bytes strings into a member of the archive. c s,|dkr|dd}tj|||f|dS)N)wbrbbr)rcsuper__init__)rfilerBrAkwargs) __class__rrr|s zBytesZipFile.__init__cst|j|dS)N)r{writestrfilename)rdata)rrrwriteszBytesZipFile.writecCs |jdkS)N)fp)rrrrclosedszBytesZipFile.closed) r r!r"r#zipfile ZIP_DEFLATEDr|rpropertyr __classcell__rr)rrrls rlc@s0eZdZdZddZddZddZdd Zd S) rsa Wrapper for the Python's mmap class so that it can be properly read in by Python's csv.reader class. Parameters ---------- f : file object File object to be mapped onto memory. Must support the 'fileno' method or have an equivalent attribute cCstj|dtjd|_dS)Nr)access)rJrfZ ACCESS_READ)rrtrrrr|szMMapWrapper.__init__cCs t|j|S)N)getattrrJ)rnamerrr __getattr__szMMapWrapper.__getattr__cCs|S)Nr)rrrrrszMMapWrapper.__iter__cCs$|j}|d}|dkr t|S)Nzutf-8r)rJreadlinedecode StopIteration)rrbrrrrs   zMMapWrapper.__next__N)r r!r"r#r|rrrrrrrrss  rsc@s2eZdZdZddZd ddZddZd d Zd S) UTF8RecoderzQ Iterator that reads an encoded stream and re-encodes the input to UTF-8 cCst|||_dS)N)codecs getreaderrP)rrtr@rrrr|szUTF8Recoder.__init__cCs|j|dS)Nzutf-8)rPrFencode)rrIrrrrFszUTF8Recoder.readcCs|jdS)Nzutf-8)rPrr)rrrrrszUTF8Recoder.readlinecCst|jdS)Nzutf-8)nextrPr)rrrrrszUTF8Recoder.nextN)r)r r!r"r#r|rFrrrrrrrs  rzutf-8cKstj|fd|i|S)Ndialect)csvrP)rtrr@kwdsrrr UnicodeReadersrcKstj|fd|i|S)Nr)rwriter)rtrr@rrrr UnicodeWritersr)NNN)NNFT)>r#rSrrr? http.clientrrrrrJr,r7 urllib.errorr urllib.parserrrr$rr r urllib.requestr r rZ pandas.compatr rZ pandas.errorsrrrrrZpandas.core.dtypes.commonrrqZ CParserErrorZ _NA_VALUESsetr&discardrr)r0r4r9r;r>rHrRrVr\rwZipFilerlrsrexcelrrrrrrsn         97 %