first commit

This commit is contained in:
Carla Floricel
2022-08-02 09:52:52 -04:00
parent 417ea8660b
commit 05e52aa52b
10444 changed files with 2300232 additions and 0 deletions

View File

@@ -0,0 +1,27 @@
import pytest
pytestmark = [
# fastparquet
pytest.mark.filterwarnings(
"ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning"
),
pytest.mark.filterwarnings(
"ignore:Block.is_categorical is deprecated:DeprecationWarning"
),
pytest.mark.filterwarnings(
r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning"
),
# xlrd
pytest.mark.filterwarnings(
"ignore:This method will be removed in future versions:DeprecationWarning"
),
pytest.mark.filterwarnings(
"ignore:This method will be removed in future versions. "
r"Use 'tree.iter\(\)' or 'list\(tree.iter\(\)\)' instead."
":PendingDeprecationWarning"
),
# GH 26552
pytest.mark.filterwarnings(
"ignore:As the xlwt package is no longer maintained:FutureWarning"
),
]

View File

@@ -0,0 +1,214 @@
import os
import shlex
import subprocess
import time
import pytest
from pandas.compat import (
is_ci_environment,
is_platform_arm,
is_platform_mac,
is_platform_windows,
)
import pandas.util._test_decorators as td
import pandas._testing as tm
from pandas.io.parsers import read_csv
@pytest.fixture
def tips_file(datapath):
"""Path to the tips dataset"""
return datapath("io", "data", "csv", "tips.csv")
@pytest.fixture
def jsonl_file(datapath):
"""Path to a JSONL dataset"""
return datapath("io", "parser", "data", "items.jsonl")
@pytest.fixture
def salaries_table(datapath):
"""DataFrame with the salaries dataset"""
return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t")
@pytest.fixture
def feather_file(datapath):
return datapath("io", "data", "feather", "feather-0_3_1.feather")
@pytest.fixture
def s3so(worker_id):
if is_ci_environment():
url = "http://localhost:5000/"
else:
worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
url = f"http://127.0.0.1:555{worker_id}/"
return {"client_kwargs": {"endpoint_url": url}}
@pytest.fixture(scope="session")
def s3_base(worker_id):
"""
Fixture for mocking S3 interaction.
Sets up moto server in separate process locally
Return url for motoserver/moto CI service
"""
pytest.importorskip("s3fs")
pytest.importorskip("boto3")
with tm.ensure_safe_environment_variables():
# temporary workaround as moto fails for botocore >= 1.11 otherwise,
# see https://github.com/spulec/moto/issues/1924 & 1952
os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")
if is_ci_environment():
if is_platform_arm() or is_platform_mac() or is_platform_windows():
# NOT RUN on Windows/MacOS/ARM, only Ubuntu
# - subprocess in CI can cause timeouts
# - Github Actions do not support
# container services for the above OSs
# - CircleCI will probably hit the Docker rate pull limit
pytest.skip(
"S3 tests do not have a corresponding service in "
"Windows, MacOS or ARM platforms"
)
else:
yield "http://localhost:5000"
else:
requests = pytest.importorskip("requests")
pytest.importorskip("moto", minversion="1.3.14")
pytest.importorskip("flask") # server mode needs flask too
# Launching moto in server mode, i.e., as a separate process
# with an S3 endpoint on localhost
worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
endpoint_port = f"555{worker_id}"
endpoint_uri = f"http://127.0.0.1:{endpoint_port}/"
# pipe to null to avoid logging in terminal
with subprocess.Popen(
shlex.split(f"moto_server s3 -p {endpoint_port}"),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
) as proc:
timeout = 5
while timeout > 0:
try:
# OK to go once server is accepting connections
r = requests.get(endpoint_uri)
if r.ok:
break
except Exception:
pass
timeout -= 0.1
time.sleep(0.1)
yield endpoint_uri
proc.terminate()
@pytest.fixture
def s3_resource(s3_base, tips_file, jsonl_file, feather_file):
"""
Sets up S3 bucket with contents
The primary bucket name is "pandas-test". The following datasets
are loaded.
- tips.csv
- tips.csv.gz
- tips.csv.bz2
- items.jsonl
A private bucket "cant_get_it" is also created. The boto3 s3 resource
is yielded by the fixture.
"""
import boto3
import s3fs
test_s3_files = [
("tips#1.csv", tips_file),
("tips.csv", tips_file),
("tips.csv.gz", tips_file + ".gz"),
("tips.csv.bz2", tips_file + ".bz2"),
("items.jsonl", jsonl_file),
("simple_dataset.feather", feather_file),
]
def add_tips_files(bucket_name):
for s3_key, file_name in test_s3_files:
with open(file_name, "rb") as f:
cli.put_object(Bucket=bucket_name, Key=s3_key, Body=f)
bucket = "pandas-test"
conn = boto3.resource("s3", endpoint_url=s3_base)
cli = boto3.client("s3", endpoint_url=s3_base)
try:
cli.create_bucket(Bucket=bucket)
except Exception:
# OK is bucket already exists
pass
try:
cli.create_bucket(Bucket="cant_get_it", ACL="private")
except Exception:
# OK is bucket already exists
pass
timeout = 2
while not cli.list_buckets()["Buckets"] and timeout > 0:
time.sleep(0.1)
timeout -= 0.1
add_tips_files(bucket)
add_tips_files("cant_get_it")
s3fs.S3FileSystem.clear_instance_cache()
yield conn
s3 = s3fs.S3FileSystem(client_kwargs={"endpoint_url": s3_base})
try:
s3.rm(bucket, recursive=True)
except Exception:
pass
try:
s3.rm("cant_get_it", recursive=True)
except Exception:
pass
timeout = 2
while cli.list_buckets()["Buckets"] and timeout > 0:
time.sleep(0.1)
timeout -= 0.1
_compression_formats_params = [
(".no_compress", None),
("", None),
(".gz", "gzip"),
(".GZ", "gzip"),
(".bz2", "bz2"),
(".BZ2", "bz2"),
(".zip", "zip"),
(".ZIP", "zip"),
(".xz", "xz"),
(".XZ", "xz"),
pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")),
pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")),
]
@pytest.fixture(params=_compression_formats_params[1:])
def compression_format(request):
return request.param
@pytest.fixture(params=_compression_formats_params)
def compression_ext(request):
return request.param[0]

View File

@@ -0,0 +1 @@
{'status': {'state': 'DONE'}, 'kind': 'bigquery#job', 'statistics': {'query': {'cacheHit': True, 'totalBytesProcessed': '0'}, 'endTime': '1377668744674', 'totalBytesProcessed': '0', 'startTime': '1377668744466'}, 'jobReference': {'projectId': '57288129629', 'jobId': 'bqjob_r5f956972f0190bdf_00000140c374bf42_2'}, 'etag': '"4PTsVxg68bQkQs1RJ1Ndewqkgg4/oO4VmgFrAku4N6FWci9s7iFIftc"', 'configuration': {'query': {'createDisposition': 'CREATE_IF_NEEDED', 'query': 'SELECT * FROM [publicdata:samples.shakespeare]', 'writeDisposition': 'WRITE_TRUNCATE', 'destinationTable': {'projectId': '57288129629', 'tableId': 'anonb5ec450da88eeeb78a27784ea482ee75a146d442', 'datasetId': '_d0b4f5f0d50dc68a3eb0fa6cba66a9a8687d9253'}}}, 'id': '57288129629:bqjob_r5f956972f0190bdf_00000140c374bf42_2', 'selfLink': 'https://www.googleapis.com/bigquery/v2/projects/57288129629/jobs/bqjob_r5f956972f0190bdf_00000140c374bf42_2'}

View File

@@ -0,0 +1,53 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<data>
<row>
<rank>1</rank>
<malename>José</malename>
<femalename>Sofía</femalename>
</row>
<row>
<rank>2</rank>
<malename>Luis</malename>
<femalename>Valentina</femalename>
</row>
<row>
<rank>3</rank>
<malename>Carlos</malename>
<femalename>Isabella</femalename>
</row>
<row>
<rank>4</rank>
<malename>Juan</malename>
<femalename>Camila</femalename>
</row>
<row>
<rank>5</rank>
<malename>Jorge</malename>
<femalename>Valeria</femalename>
</row>
<row>
<rank>6</rank>
<malename>Pedro</malename>
<femalename>Mariana</femalename>
</row>
<row>
<rank>7</rank>
<malename>Jesús</malename>
<femalename>Gabriela</femalename>
</row>
<row>
<rank>8</rank>
<malename>Manuel</malename>
<femalename>Sara</femalename>
</row>
<row>
<rank>9</rank>
<malename>Santiago</malename>
<femalename>Daniella</femalename>
</row>
<row>
<rank>10</rank>
<malename>Sebastián</malename>
<femalename>María José</femalename>
</row>
</data>

View File

@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<bookstore>
<book category="cooking">
<title lang="en">Everyday Italian</title>
<author>Giada De Laurentiis</author>
<year>2005</year>
<price>30.00</price>
</book>
<book category="children">
<title lang="en">Harry Potter</title>
<author>J K. Rowling</author>
<year>2005</year>
<price>29.99</price>
</book>
<book category="web">
<title lang="en">Learning XML</title>
<author>Erik T. Ray</author>
<year>2003</year>
<price>39.95</price>
</book>
</bookstore>

View File

@@ -0,0 +1,92 @@
<kml xmlns="http://www.opengis.net/kml/2.2"
xmlns:gx="http://www.google.com/kml/ext/2.2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.opengis.net/kml/2.2 http://schemas.opengis.net/kml/2.2.0/ogckml22.xsd http://www.google.com/kml/ext/2.2 http://code.google.com/apis/kml/schema/kml22gx.xsd">
<Document id="CTA_RailLines">
<name>CTA_RailLines</name>
<Snippet/>
<Folder id="FeatureLayer0">
<name>CTA_RailLines</name>
<Snippet/>
<Placemark id="ID_00001">
<name>Blue Line (Forest Park)</name>
<Snippet/>
<description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Blue Line (Forest Park)</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>1</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100001</td> </tr> <tr> <td>LINES</td> <td>Blue Line (Forest Park)</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Oak Park to Austin</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>BL</td> </tr> <tr> <td>ALT_LEGEND</td> <td>BL</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Blue Line Forest Park</td> </tr> <tr> <td>SHAPE.LEN</td> <td>4060.368778</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
<styleUrl>#LineStyle01</styleUrl>
<MultiGeometry>
<LineString>
<extrude>0</extrude>
<altitudeMode>clampedToGround</altitudeMode>
<coordinates>-87.77678526964958,41.8708863930319,0 -87.77826234150609,41.87097820122218,0 -87.78251583439344,41.87130129991005,0 -87.78418294588424,41.87145055520308,0 -87.7872369165933,41.8717239119163,0 -87.79160214925886,41.87210797280065,0</coordinates>
</LineString>
</MultiGeometry>
</Placemark>
<Placemark id="ID_00002">
<name>Red, Purple Line</name>
<Snippet/>
<description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>2</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100002</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Lawrence to Wilson</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>1800.132896</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
<styleUrl>#LineStyle01</styleUrl>
<MultiGeometry>
<LineString>
<extrude>0</extrude>
<altitudeMode>clampedToGround</altitudeMode>
<coordinates>-87.65758750947528,41.96427269188822,0 -87.65802133507393,41.96581929055245,0 -87.65819033925305,41.96621846093642,0 -87.6583189819129,41.96650362897086,0 -87.65835858701473,41.96669002089185,0 -87.65838428411853,41.96688150295095,0 -87.65842208882658,41.96745896091846,0 -87.65846556843937,41.9683761425439,0 -87.65849296214573,41.96913893870342,0</coordinates>
</LineString>
</MultiGeometry>
</Placemark>
<Placemark id="ID_00003">
<name>Red, Purple Line</name>
<Snippet/>
<description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>3</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100003</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Wilson to Sheridan</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>4256.243677</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
<styleUrl>#LineStyle01</styleUrl>
<MultiGeometry>
<LineString>
<extrude>0</extrude>
<altitudeMode>clampedToGround</altitudeMode>
<coordinates>-87.65492939166126,41.95377494531437,0 -87.65557043199591,41.95376544118533,0 -87.65606302030132,41.95376391658746,0 -87.65623502146268,41.95377379126367,0 -87.65634748981634,41.95380103566435,0 -87.65646537904269,41.95387703994676,0 -87.65656532461145,41.95396622645799,0 -87.65664760856414,41.95404201996044,0 -87.65671750555913,41.95416647054043,0 -87.65673983607117,41.95429949810849,0 -87.65673866475777,41.95441024240925,0 -87.6567690255541,41.95490657227902,0 -87.65683672482363,41.95692259283837,0 -87.6568900886376,41.95861070983142,0 -87.65699865558875,41.96181418669004,0 -87.65756347177603,41.96397045777844,0 -87.65758750947528,41.96427269188822,0</coordinates>
</LineString>
</MultiGeometry>
</Placemark>
<Placemark id="ID_00004">
<name>Red, Purple Line</name>
<Snippet/>
<description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>4</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100004</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Sheridan to Addison</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>2581.713736</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
<styleUrl>#LineStyle01</styleUrl>
<MultiGeometry>
<LineString>
<extrude>0</extrude>
<altitudeMode>clampedToGround</altitudeMode>
<coordinates>-87.65362593118043,41.94742799535678,0 -87.65363554415794,41.94819886386848,0 -87.6536456393239,41.95059994675451,0 -87.65365831235026,41.95108288489359,0 -87.6536604873874,41.9519954657554,0 -87.65362592053201,41.95245597302328,0 -87.65367158496069,41.95311153649393,0 -87.65368468595476,41.9533202828916,0 -87.65369271253692,41.95343095587119,0 -87.65373335834569,41.95351536301472,0 -87.65378605844126,41.95358212680591,0 -87.65385067928185,41.95364452823767,0 -87.6539390793817,41.95370263886964,0 -87.6540786298351,41.95373403675265,0 -87.65430648647626,41.9537535411832,0 -87.65492939166126,41.95377494531437,0</coordinates>
</LineString>
</MultiGeometry>
</Placemark>
<Placemark id="ID_00005">
<name>Red, Purple Line</name>
<Snippet/>
<description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>5</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100005</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Addison to Clark Junction</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>1918.716686</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
<styleUrl>#LineStyle01</styleUrl>
<MultiGeometry>
<LineString>
<extrude>0</extrude>
<altitudeMode>clampedToGround</altitudeMode>
<coordinates>-87.65345391792157,41.94217681262115,0 -87.65342448305786,41.94237224420864,0 -87.65339745703922,41.94268217746244,0 -87.65337753982941,41.94288140770284,0 -87.65336256753105,41.94317369618263,0 -87.65338799707138,41.94357253961736,0 -87.65340240886648,41.94389158188269,0 -87.65341837392448,41.94406444407721,0 -87.65342275247338,41.94421065714904,0 -87.65347469646018,41.94434829382345,0 -87.65351486483024,41.94447699917548,0 -87.65353483605053,41.9453896864472,0 -87.65361975532807,41.94689193720703,0 -87.65362593118043,41.94742799535678,0</coordinates>
</LineString>
</MultiGeometry>
</Placemark>
</Folder>
<Style id="LineStyle01">
<LabelStyle>
<color>00000000</color>
<scale>0.000000</scale>
</LabelStyle>
<LineStyle>
<color>ff899e00</color>
<width>1.000000</width>
</LineStyle>
<PolyStyle>
<color>00000000</color>
<outline>0</outline>
</PolyStyle>
</Style>
</Document>
</kml>

View File

@@ -0,0 +1,18 @@
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:k="http://www.opengis.net/kml/2.2">
<xsl:output method="xml" omit-xml-declaration="yes"
cdata-section-elements="k:description" indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="k:MultiGeometry|k:LineString">
<xsl:apply-templates select='*'/>
</xsl:template>
<xsl:template match="k:description|k:Snippet|k:Style"/>
</xsl:stylesheet>

View File

@@ -0,0 +1,19 @@
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="utf-8" indent="yes" />
<xsl:strip-space elements="*"/>
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="row/*">
<field>
<xsl:attribute name="field">
<xsl:value-of select="name()"/>
</xsl:attribute>
<xsl:value-of select="text()"/>
</field>
</xsl:template>
</xsl:stylesheet>

View File

@@ -0,0 +1,35 @@
import pytest
from pandas.compat._optional import (
get_version,
import_optional_dependency,
)
from pandas.util.version import Version
pytestmark = [
pytest.mark.filterwarnings(
# Looks like tree.getiterator is deprecated in favor of tree.iter
"ignore:This method will be removed in future versions:"
"PendingDeprecationWarning"
),
pytest.mark.filterwarnings(
"ignore:This method will be removed in future versions:DeprecationWarning"
),
# GH 26552
pytest.mark.filterwarnings(
"ignore:As the xlwt package is no longer maintained:FutureWarning"
),
# GH 38571
pytest.mark.filterwarnings(
"ignore:.*In xlrd >= 2.0, only the xls format is supported:FutureWarning"
),
]
if import_optional_dependency("xlrd", errors="ignore") is None:
xlrd_version = None
else:
import xlrd
xlrd_version = Version(get_version(xlrd))

View File

@@ -0,0 +1,67 @@
import pytest
from pandas.compat import is_platform_windows
import pandas.util._test_decorators as td
import pandas._testing as tm
from pandas.io.parsers import read_csv
@pytest.fixture
def frame(float_frame):
"""
Returns the first ten items in fixture "float_frame".
"""
return float_frame[:10]
@pytest.fixture
def tsframe():
return tm.makeTimeDataFrame()[:5]
@pytest.fixture(params=[True, False])
def merge_cells(request):
return request.param
@pytest.fixture
def df_ref(datapath):
"""
Obtain the reference data from read_csv with the Python engine.
"""
filepath = datapath("io", "data", "csv", "test1.csv")
df_ref = read_csv(filepath, index_col=0, parse_dates=True, engine="python")
return df_ref
@pytest.fixture(params=[".xls", ".xlsx", ".xlsm", ".ods", ".xlsb"])
def read_ext(request):
"""
Valid extensions for reading Excel files.
"""
return request.param
# Checking for file leaks can hang on Windows CI
@pytest.fixture(autouse=not is_platform_windows())
def check_for_file_leaks():
"""
Fixture to run around every test to ensure that we are not leaking files.
See also
--------
_test_decorators.check_file_leaks
"""
# GH#30162
psutil = td.safe_import("psutil")
if not psutil:
yield
else:
proc = psutil.Process()
flist = proc.open_files()
yield
flist2 = proc.open_files()
assert flist == flist2

View File

@@ -0,0 +1,38 @@
import functools
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
pytest.importorskip("odf")
@pytest.fixture(autouse=True)
def cd_and_set_engine(monkeypatch, datapath):
func = functools.partial(pd.read_excel, engine="odf")
monkeypatch.setattr(pd, "read_excel", func)
monkeypatch.chdir(datapath("io", "data", "excel"))
def test_read_invalid_types_raises():
# the invalid_value_type.ods required manually editing
# of the included content.xml file
with pytest.raises(ValueError, match="Unrecognized type awesome_new_type"):
pd.read_excel("invalid_value_type.ods")
def test_read_writer_table():
# Also test reading tables from an text OpenDocument file
# (.odt)
index = pd.Index(["Row 1", "Row 2", "Row 3"], name="Header")
expected = pd.DataFrame(
[[1, np.nan, 7], [2, np.nan, 8], [3, np.nan, 9]],
index=index,
columns=["Column 1", "Unnamed: 2", "Column 3"],
)
result = pd.read_excel("writertable.odt", sheet_name="Table1", index_col=0)
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,58 @@
import re
import pytest
import pandas._testing as tm
from pandas.io.excel import ExcelWriter
odf = pytest.importorskip("odf")
pytestmark = pytest.mark.parametrize("ext", [".ods"])
def test_write_append_mode_raises(ext):
msg = "Append mode is not supported with odf!"
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=msg):
ExcelWriter(f, engine="odf", mode="a")
def test_kwargs(ext):
# GH 42286
# GH 43445
# test for error: OpenDocumentSpreadsheet does not accept any arguments
kwargs = {"kwarg": 1}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
error = re.escape(
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
)
with pytest.raises(
TypeError,
match=error,
):
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="odf", **kwargs) as _:
pass
@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}])
def test_engine_kwargs(ext, engine_kwargs):
# GH 42286
# GH 43445
# test for error: OpenDocumentSpreadsheet does not accept any arguments
with tm.ensure_clean(ext) as f:
if engine_kwargs is not None:
error = re.escape(
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
)
with pytest.raises(
TypeError,
match=error,
):
ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs)
else:
with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
pass

View File

@@ -0,0 +1,385 @@
from pathlib import Path
import re
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame
import pandas._testing as tm
from pandas.io.excel import (
ExcelWriter,
_OpenpyxlWriter,
)
openpyxl = pytest.importorskip("openpyxl")
pytestmark = pytest.mark.parametrize("ext", [".xlsx"])
def test_to_excel_styleconverter(ext):
from openpyxl import styles
hstyle = {
"font": {"color": "00FF0000", "bold": True},
"borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
"alignment": {"horizontal": "center", "vertical": "top"},
"fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}},
"number_format": {"format_code": "0.00"},
"protection": {"locked": True, "hidden": False},
}
font_color = styles.Color("00FF0000")
font = styles.Font(bold=True, color=font_color)
side = styles.Side(style=styles.borders.BORDER_THIN)
border = styles.Border(top=side, right=side, bottom=side, left=side)
alignment = styles.Alignment(horizontal="center", vertical="top")
fill_color = styles.Color(rgb="006666FF", tint=0.3)
fill = styles.PatternFill(patternType="solid", fgColor=fill_color)
number_format = "0.00"
protection = styles.Protection(locked=True, hidden=False)
kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
assert kw["font"] == font
assert kw["border"] == border
assert kw["alignment"] == alignment
assert kw["fill"] == fill
assert kw["number_format"] == number_format
assert kw["protection"] == protection
def test_write_cells_merge_styled(ext):
from pandas.io.formats.excel import ExcelCell
sheet_name = "merge_styled"
sty_b1 = {"font": {"color": "00FF0000"}}
sty_a2 = {"font": {"color": "0000FF00"}}
initial_cells = [
ExcelCell(col=1, row=0, val=42, style=sty_b1),
ExcelCell(col=0, row=1, val=99, style=sty_a2),
]
sty_merged = {"font": {"color": "000000FF", "bold": True}}
sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
openpyxl_sty_merged = sty_kwargs["font"]
merge_cells = [
ExcelCell(
col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged
)
]
with tm.ensure_clean(ext) as path:
with _OpenpyxlWriter(path) as writer:
writer.write_cells(initial_cells, sheet_name=sheet_name)
writer.write_cells(merge_cells, sheet_name=sheet_name)
wks = writer.sheets[sheet_name]
xcell_b1 = wks["B1"]
xcell_a2 = wks["A2"]
assert xcell_b1.font == openpyxl_sty_merged
assert xcell_a2.font == openpyxl_sty_merged
@pytest.mark.parametrize("iso_dates", [True, False])
def test_kwargs(ext, iso_dates):
# GH 42286 GH 43445
kwargs = {"iso_dates": iso_dates}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="openpyxl", **kwargs) as writer:
assert writer.book.iso_dates == iso_dates
# ExcelWriter won't allow us to close without writing something
DataFrame().to_excel(writer)
@pytest.mark.parametrize("iso_dates", [True, False])
def test_engine_kwargs_write(ext, iso_dates):
# GH 42286 GH 43445
engine_kwargs = {"iso_dates": iso_dates}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
assert writer.book.iso_dates == iso_dates
# ExcelWriter won't allow us to close without writing something
DataFrame().to_excel(writer)
def test_engine_kwargs_append_invalid(ext):
# GH 43445
# test whether an invalid engine kwargs actually raises
with tm.ensure_clean(ext) as f:
DataFrame(["hello", "world"]).to_excel(f)
with pytest.raises(
TypeError,
match=re.escape(
"load_workbook() got an unexpected keyword argument 'apple_banana'"
),
):
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
) as writer:
# ExcelWriter needs us to write something to close properly
DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")
@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
def test_engine_kwargs_append_data_only(ext, data_only, expected):
# GH 43445
# tests whether the data_only engine_kwarg actually works well for
# openpyxl's load_workbook
with tm.ensure_clean(ext) as f:
DataFrame(["=1+1"]).to_excel(f)
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
) as writer:
assert writer.sheets["Sheet1"]["B2"].value == expected
# ExcelWriter needs us to writer something to close properly?
DataFrame().to_excel(writer, sheet_name="Sheet2")
@pytest.mark.parametrize(
"mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
)
def test_write_append_mode(ext, mode, expected):
df = DataFrame([1], columns=["baz"])
with tm.ensure_clean(ext) as f:
wb = openpyxl.Workbook()
wb.worksheets[0].title = "foo"
wb.worksheets[0]["A1"].value = "foo"
wb.create_sheet("bar")
wb.worksheets[1]["A1"].value = "bar"
wb.save(f)
with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
df.to_excel(writer, sheet_name="baz", index=False)
wb2 = openpyxl.load_workbook(f)
result = [sheet.title for sheet in wb2.worksheets]
assert result == expected
for index, cell_value in enumerate(expected):
assert wb2.worksheets[index]["A1"].value == cell_value
@pytest.mark.parametrize(
"if_sheet_exists,num_sheets,expected",
[
("new", 2, ["apple", "banana"]),
("replace", 1, ["pear"]),
("overlay", 1, ["pear", "banana"]),
],
)
def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
# GH 40230
df1 = DataFrame({"fruit": ["apple", "banana"]})
df2 = DataFrame({"fruit": ["pear"]})
with tm.ensure_clean(ext) as f:
df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
) as writer:
df2.to_excel(writer, sheet_name="foo", index=False)
wb = openpyxl.load_workbook(f)
assert len(wb.sheetnames) == num_sheets
assert wb.sheetnames[0] == "foo"
result = pd.read_excel(wb, "foo", engine="openpyxl")
assert list(result["fruit"]) == expected
if len(wb.sheetnames) == 2:
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
tm.assert_frame_equal(result, df2)
wb.close()
@pytest.mark.parametrize(
"startrow, startcol, greeting, goodbye",
[
(0, 0, ["poop", "world"], ["goodbye", "people"]),
(0, 1, ["hello", "world"], ["poop", "people"]),
(1, 0, ["hello", "poop"], ["goodbye", "people"]),
(1, 1, ["hello", "world"], ["goodbye", "poop"]),
],
)
def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye):
df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
df2 = DataFrame(["poop"])
with tm.ensure_clean(ext) as f:
df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False)
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists="overlay"
) as writer:
# use startrow+1 because we don't have a header
df2.to_excel(
writer,
index=False,
header=False,
startrow=startrow + 1,
startcol=startcol,
sheet_name="poo",
)
result = pd.read_excel(f, sheet_name="poo", engine="openpyxl")
expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"if_sheet_exists,msg",
[
(
"invalid",
"'invalid' is not valid for if_sheet_exists. Valid options "
"are 'error', 'new', 'replace' and 'overlay'.",
),
(
"error",
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
),
(
None,
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
),
],
)
def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
# GH 40230
df = DataFrame({"fruit": ["pear"]})
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=re.escape(msg)):
df.to_excel(f, "foo", engine="openpyxl")
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
) as writer:
df.to_excel(writer, sheet_name="foo")
def test_to_excel_with_openpyxl_engine(ext):
# GH 29854
with tm.ensure_clean(ext) as filename:
df1 = DataFrame({"A": np.linspace(1, 10, 10)})
df2 = DataFrame({"B": np.linspace(1, 20, 10)})
df = pd.concat([df1, df2], axis=1)
styled = df.style.applymap(
lambda val: "color: %s" % ("red" if val < 0 else "black")
).highlight_max()
styled.to_excel(filename, engine="openpyxl")
@pytest.mark.parametrize("read_only", [True, False])
def test_read_workbook(datapath, ext, read_only):
# GH 39528
filename = datapath("io", "data", "excel", "test1" + ext)
wb = openpyxl.load_workbook(filename, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl")
wb.close()
expected = pd.read_excel(filename)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"header, expected_data",
[
(
0,
{
"Title": [np.nan, "A", 1, 2, 3],
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
},
),
(2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
],
)
@pytest.mark.parametrize(
"filename", ["dimension_missing", "dimension_small", "dimension_large"]
)
# When read_only is None, use read_excel instead of a workbook
@pytest.mark.parametrize("read_only", [True, False, None])
def test_read_with_bad_dimension(
datapath, ext, header, expected_data, filename, read_only, request
):
# GH 38956, 39001 - no/incorrect dimension information
path = datapath("io", "data", "excel", f"{filename}{ext}")
if read_only is None:
result = pd.read_excel(path, header=header)
else:
wb = openpyxl.load_workbook(path, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl", header=header)
wb.close()
expected = DataFrame(expected_data)
tm.assert_frame_equal(result, expected)
def test_append_mode_file(ext):
# GH 39576
df = DataFrame()
with tm.ensure_clean(ext) as f:
df.to_excel(f, engine="openpyxl")
with ExcelWriter(
f, mode="a", engine="openpyxl", if_sheet_exists="new"
) as writer:
df.to_excel(writer)
# make sure that zip files are not concatenated by making sure that
# "docProps/app.xml" only occurs twice in the file
data = Path(f).read_bytes()
first = data.find(b"docProps/app.xml")
second = data.find(b"docProps/app.xml", first + 1)
third = data.find(b"docProps/app.xml", second + 1)
assert second != -1 and third == -1
# When read_only is None, use read_excel instead of a workbook
@pytest.mark.parametrize("read_only", [True, False, None])
def test_read_with_empty_trailing_rows(datapath, ext, read_only, request):
# GH 39181
path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}")
if read_only is None:
result = pd.read_excel(path)
else:
wb = openpyxl.load_workbook(path, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl")
wb.close()
expected = DataFrame(
{
"Title": [np.nan, "A", 1, 2, 3],
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
}
)
tm.assert_frame_equal(result, expected)
# When read_only is None, use read_excel instead of a workbook
@pytest.mark.parametrize("read_only", [True, False, None])
def test_read_empty_with_blank_row(datapath, ext, read_only):
# GH 39547 - empty excel file with a row that has no data
path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}")
if read_only is None:
result = pd.read_excel(path)
else:
wb = openpyxl.load_workbook(path, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl")
wb.close()
expected = DataFrame()
tm.assert_frame_equal(result, expected)
def test_ints_spelled_with_decimals(datapath, ext):
# GH 46988 - openpyxl returns this sheet with floats
path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}")
result = pd.read_excel(path)
expected = DataFrame(range(2, 12), columns=[1])
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,167 @@
import numpy as np
import pytest
from pandas import DataFrame
import pandas._testing as tm
from pandas.io.excel import ExcelWriter
from pandas.io.formats.excel import ExcelFormatter
pytest.importorskip("jinja2")
# jinja2 is currently required for Styler.__init__(). Technically Styler.to_excel
# could compute styles and render to excel without jinja2, since there is no
# 'template' file, but this needs the import error to delayed until render time.
def assert_equal_cell_styles(cell1, cell2):
# TODO: should find a better way to check equality
assert cell1.alignment.__dict__ == cell2.alignment.__dict__
assert cell1.border.__dict__ == cell2.border.__dict__
assert cell1.fill.__dict__ == cell2.fill.__dict__
assert cell1.font.__dict__ == cell2.font.__dict__
assert cell1.number_format == cell2.number_format
assert cell1.protection.__dict__ == cell2.protection.__dict__
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
def test_styler_to_excel_unstyled(engine):
# compare DataFrame.to_excel and Styler.to_excel when no styles applied
pytest.importorskip(engine)
df = DataFrame(np.random.randn(2, 2))
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
df.to_excel(writer, sheet_name="dataframe")
df.style.to_excel(writer, sheet_name="unstyled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
wb = openpyxl.load_workbook(path)
for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns):
assert len(col1) == len(col2)
for cell1, cell2 in zip(col1, col2):
assert cell1.value == cell2.value
assert_equal_cell_styles(cell1, cell2)
shared_style_params = [
(
"background-color: #111222",
["fill", "fgColor", "rgb"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
(
"color: #111222",
["font", "color", "value"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
("font-family: Arial;", ["font", "name"], "arial"),
("font-weight: bold;", ["font", "b"], True),
("font-style: italic;", ["font", "i"], True),
("text-decoration: underline;", ["font", "u"], "single"),
("number-format: $??,???.00;", ["number_format"], "$??,???.00"),
("text-align: left;", ["alignment", "horizontal"], "left"),
(
"vertical-align: bottom;",
["alignment", "vertical"],
{"xlsxwriter": None, "openpyxl": "bottom"}, # xlsxwriter Fails
),
]
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
def test_styler_to_excel_basic(engine, css, attrs, expected):
pytest.importorskip(engine)
df = DataFrame(np.random.randn(1, 1))
styler = df.style.applymap(lambda x: css)
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
df.to_excel(writer, sheet_name="dataframe")
styler.to_excel(writer, sheet_name="styled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
wb = openpyxl.load_workbook(path)
# test unstyled data cell does not have expected styles
# test styled cell has expected styles
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
for attr in attrs:
u_cell, s_cell = getattr(u_cell, attr), getattr(s_cell, attr)
if isinstance(expected, dict):
assert u_cell is None or u_cell != expected[engine]
assert s_cell == expected[engine]
else:
assert u_cell is None or u_cell != expected
assert s_cell == expected
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
def test_styler_to_excel_basic_indexes(engine, css, attrs, expected):
pytest.importorskip(engine)
df = DataFrame(np.random.randn(1, 1))
styler = df.style
styler.applymap_index(lambda x: css, axis=0)
styler.applymap_index(lambda x: css, axis=1)
null_styler = df.style
null_styler.applymap(lambda x: "null: css;")
null_styler.applymap_index(lambda x: "null: css;", axis=0)
null_styler.applymap_index(lambda x: "null: css;", axis=1)
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
null_styler.to_excel(writer, sheet_name="null_styled")
styler.to_excel(writer, sheet_name="styled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
wb = openpyxl.load_workbook(path)
# test null styled index cells does not have expected styles
# test styled cell has expected styles
ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
for attr in attrs:
ui_cell, si_cell = getattr(ui_cell, attr), getattr(si_cell, attr)
uc_cell, sc_cell = getattr(uc_cell, attr), getattr(sc_cell, attr)
if isinstance(expected, dict):
assert ui_cell is None or ui_cell != expected[engine]
assert si_cell == expected[engine]
assert uc_cell is None or uc_cell != expected[engine]
assert sc_cell == expected[engine]
else:
assert ui_cell is None or ui_cell != expected
assert si_cell == expected
assert uc_cell is None or uc_cell != expected
assert sc_cell == expected
def test_styler_custom_converter():
openpyxl = pytest.importorskip("openpyxl")
def custom_converter(css):
return {"font": {"color": {"rgb": "111222"}}}
df = DataFrame(np.random.randn(1, 1))
styler = df.style.applymap(lambda x: "color: #888999")
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine="openpyxl") as writer:
ExcelFormatter(styler, style_converter=custom_converter).write(
writer, sheet_name="custom"
)
wb = openpyxl.load_workbook(path)
assert wb["custom"].cell(2, 2).font.color.value == "00111222"

View File

@@ -0,0 +1,109 @@
import io
import pytest
from pandas.compat._optional import import_optional_dependency
import pandas as pd
import pandas._testing as tm
from pandas.tests.io.excel import xlrd_version
from pandas.util.version import Version
from pandas.io.excel import ExcelFile
from pandas.io.excel._base import inspect_excel_format
xlrd = pytest.importorskip("xlrd")
xlwt = pytest.importorskip("xlwt")
pytestmark = pytest.mark.filterwarnings(
"ignore:As the xlwt package is no longer maintained:FutureWarning"
)
# error: Unsupported operand types for <= ("Version" and "None")
if xlrd_version >= Version("2"): # type: ignore[operator]
exts = [".xls"]
else:
exts = [".xls", ".xlsx", ".xlsm"]
@pytest.fixture(params=exts)
def read_ext_xlrd(request):
"""
Valid extensions for reading Excel files with xlrd.
Similar to read_ext, but excludes .ods, .xlsb, and for xlrd>2 .xlsx, .xlsm
"""
return request.param
def test_read_xlrd_book(read_ext_xlrd, frame):
df = frame
engine = "xlrd"
sheet_name = "SheetA"
with tm.ensure_clean(read_ext_xlrd) as pth:
df.to_excel(pth, sheet_name)
book = xlrd.open_workbook(pth)
with ExcelFile(book, engine=engine) as xl:
result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
tm.assert_frame_equal(df, result)
result = pd.read_excel(book, sheet_name=sheet_name, engine=engine, index_col=0)
tm.assert_frame_equal(df, result)
def test_excel_file_warning_with_xlsx_file(datapath):
# GH 29375
path = datapath("io", "data", "excel", "test1.xlsx")
has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None
if not has_openpyxl:
with tm.assert_produces_warning(
FutureWarning,
raise_on_extra_warnings=False,
match="The xlrd engine is no longer maintained",
):
ExcelFile(path, engine=None)
else:
with tm.assert_produces_warning(None):
pd.read_excel(path, "Sheet1", engine=None)
def test_read_excel_warning_with_xlsx_file(datapath):
# GH 29375
path = datapath("io", "data", "excel", "test1.xlsx")
has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None
if not has_openpyxl:
if xlrd_version >= Version("2"):
with pytest.raises(
ValueError,
match="Your version of xlrd is ",
):
pd.read_excel(path, "Sheet1", engine=None)
else:
with tm.assert_produces_warning(
FutureWarning,
raise_on_extra_warnings=False,
match="The xlrd engine is no longer maintained",
):
pd.read_excel(path, "Sheet1", engine=None)
else:
with tm.assert_produces_warning(None):
pd.read_excel(path, "Sheet1", engine=None)
@pytest.mark.parametrize(
"file_header",
[
b"\x09\x00\x04\x00\x07\x00\x10\x00",
b"\x09\x02\x06\x00\x00\x00\x10\x00",
b"\x09\x04\x06\x00\x00\x00\x10\x00",
b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
],
)
def test_read_old_xls_files(file_header):
# GH 41226
f = io.BytesIO(file_header)
assert inspect_excel_format(f) == "xls"

View File

@@ -0,0 +1,84 @@
import re
import warnings
import pytest
from pandas import DataFrame
import pandas._testing as tm
from pandas.io.excel import ExcelWriter
xlsxwriter = pytest.importorskip("xlsxwriter")
pytestmark = pytest.mark.parametrize("ext", [".xlsx"])
def test_column_format(ext):
# Test that column formats are applied to cells. Test for issue #9167.
# Applicable to xlsxwriter only.
with warnings.catch_warnings():
# Ignore the openpyxl lxml warning.
warnings.simplefilter("ignore")
openpyxl = pytest.importorskip("openpyxl")
with tm.ensure_clean(ext) as path:
frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]})
with ExcelWriter(path) as writer:
frame.to_excel(writer)
# Add a number format to col B and ensure it is applied to cells.
num_format = "#,##0"
write_workbook = writer.book
write_worksheet = write_workbook.worksheets()[0]
col_format = write_workbook.add_format({"num_format": num_format})
write_worksheet.set_column("B:B", None, col_format)
read_workbook = openpyxl.load_workbook(path)
try:
read_worksheet = read_workbook["Sheet1"]
except TypeError:
# compat
read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")
# Get the number format from the cell.
try:
cell = read_worksheet["B2"]
except TypeError:
# compat
cell = read_worksheet.cell("B2")
try:
read_num_format = cell.number_format
except AttributeError:
read_num_format = cell.style.number_format._format_code
assert read_num_format == num_format
def test_write_append_mode_raises(ext):
msg = "Append mode is not supported with xlsxwriter!"
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=msg):
ExcelWriter(f, engine="xlsxwriter", mode="a")
@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
def test_kwargs(ext, nan_inf_to_errors):
# GH 42286
kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="xlsxwriter", **kwargs) as writer:
assert writer.book.nan_inf_to_errors == nan_inf_to_errors
@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
def test_engine_kwargs(ext, nan_inf_to_errors):
# GH 42286
engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="xlsxwriter", engine_kwargs=engine_kwargs) as writer:
assert writer.book.nan_inf_to_errors == nan_inf_to_errors

View File

@@ -0,0 +1,127 @@
import re
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
options,
)
import pandas._testing as tm
from pandas.io.excel import (
ExcelWriter,
_XlwtWriter,
)
xlwt = pytest.importorskip("xlwt")
pytestmark = pytest.mark.parametrize("ext,", [".xls"])
def test_excel_raise_error_on_multiindex_columns_and_no_index(ext):
# MultiIndex as columns is not yet implemented 9794
cols = MultiIndex.from_tuples(
[("site", ""), ("2014", "height"), ("2014", "weight")]
)
df = DataFrame(np.random.randn(10, 3), columns=cols)
msg = (
"Writing to Excel with MultiIndex columns and no index "
"\\('index'=False\\) is not yet implemented."
)
with pytest.raises(NotImplementedError, match=msg):
with tm.ensure_clean(ext) as path:
df.to_excel(path, index=False)
def test_excel_multiindex_columns_and_index_true(ext):
cols = MultiIndex.from_tuples(
[("site", ""), ("2014", "height"), ("2014", "weight")]
)
df = DataFrame(np.random.randn(10, 3), columns=cols)
with tm.ensure_clean(ext) as path:
df.to_excel(path, index=True)
def test_excel_multiindex_index(ext):
# MultiIndex as index works so assert no error #9794
cols = MultiIndex.from_tuples(
[("site", ""), ("2014", "height"), ("2014", "weight")]
)
df = DataFrame(np.random.randn(3, 10), index=cols)
with tm.ensure_clean(ext) as path:
df.to_excel(path, index=False)
def test_to_excel_styleconverter(ext):
hstyle = {
"font": {"bold": True},
"borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
"alignment": {"horizontal": "center", "vertical": "top"},
}
xls_style = _XlwtWriter._convert_to_style(hstyle)
assert xls_style.font.bold
assert xlwt.Borders.THIN == xls_style.borders.top
assert xlwt.Borders.THIN == xls_style.borders.right
assert xlwt.Borders.THIN == xls_style.borders.bottom
assert xlwt.Borders.THIN == xls_style.borders.left
assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz
assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert
def test_write_append_mode_raises(ext):
msg = "Append mode is not supported with xlwt!"
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=msg):
ExcelWriter(f, engine="xlwt", mode="a")
def test_to_excel_xlwt_warning(ext):
# GH 26552
df = DataFrame(np.random.randn(3, 10))
with tm.ensure_clean(ext) as path:
with tm.assert_produces_warning(
FutureWarning,
match="As the xlwt package is no longer maintained",
):
df.to_excel(path)
def test_option_xls_writer_deprecated(ext):
# GH 26552
with tm.assert_produces_warning(
FutureWarning,
match="As the xlwt package is no longer maintained",
check_stacklevel=False,
):
options.io.excel.xls.writer = "xlwt"
@pytest.mark.parametrize("style_compression", [0, 2])
def test_kwargs(ext, style_compression):
# GH 42286
kwargs = {"style_compression": style_compression}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="xlwt", **kwargs) as writer:
assert (
writer.book._Workbook__styles.style_compression == style_compression
)
# xlwt won't allow us to close without writing something
DataFrame().to_excel(writer)
@pytest.mark.parametrize("style_compression", [0, 2])
def test_engine_kwargs(ext, style_compression):
# GH 42286
engine_kwargs = {"style_compression": style_compression}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="xlwt", engine_kwargs=engine_kwargs) as writer:
assert writer.book._Workbook__styles.style_compression == style_compression
# xlwt won't allow us to close without writing something
DataFrame().to_excel(writer)

View File

@@ -0,0 +1,307 @@
import numpy as np
import pytest
from pandas import DataFrame
pytest.importorskip("jinja2")
def bar_grad(a=None, b=None, c=None, d=None):
"""Used in multiple tests to simplify formatting of expected result"""
ret = [("width", "10em")]
if all(x is None for x in [a, b, c, d]):
return ret
return ret + [
(
"background",
f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})",
)
]
def no_bar():
return bar_grad()
def bar_to(x, color="#d65f5f"):
return bar_grad(f" {color} {x:.1f}%", f" transparent {x:.1f}%")
def bar_from_to(x, y, color="#d65f5f"):
return bar_grad(
f" transparent {x:.1f}%",
f" {color} {x:.1f}%",
f" {color} {y:.1f}%",
f" transparent {y:.1f}%",
)
@pytest.fixture
def df_pos():
return DataFrame([[1], [2], [3]])
@pytest.fixture
def df_neg():
return DataFrame([[-1], [-2], [-3]])
@pytest.fixture
def df_mix():
return DataFrame([[-3], [1], [2]])
@pytest.mark.parametrize(
"align, exp",
[
("left", [no_bar(), bar_to(50), bar_to(100)]),
("right", [bar_to(100), bar_from_to(50, 100), no_bar()]),
("mid", [bar_to(33.33), bar_to(66.66), bar_to(100)]),
("zero", [bar_from_to(50, 66.7), bar_from_to(50, 83.3), bar_from_to(50, 100)]),
("mean", [bar_to(50), no_bar(), bar_from_to(50, 100)]),
(2.0, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
(np.median, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
],
)
def test_align_positive_cases(df_pos, align, exp):
# test different align cases for all positive values
result = df_pos.style.bar(align=align)._compute().ctx
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
assert result == expected
@pytest.mark.parametrize(
"align, exp",
[
("left", [bar_to(100), bar_to(50), no_bar()]),
("right", [no_bar(), bar_from_to(50, 100), bar_to(100)]),
("mid", [bar_from_to(66.66, 100), bar_from_to(33.33, 100), bar_to(100)]),
("zero", [bar_from_to(33.33, 50), bar_from_to(16.66, 50), bar_to(50)]),
("mean", [bar_from_to(50, 100), no_bar(), bar_to(50)]),
(-2.0, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
(np.median, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
],
)
def test_align_negative_cases(df_neg, align, exp):
# test different align cases for all negative values
result = df_neg.style.bar(align=align)._compute().ctx
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
assert result == expected
@pytest.mark.parametrize(
"align, exp",
[
("left", [no_bar(), bar_to(80), bar_to(100)]),
("right", [bar_to(100), bar_from_to(80, 100), no_bar()]),
("mid", [bar_to(60), bar_from_to(60, 80), bar_from_to(60, 100)]),
("zero", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
("mean", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
(-0.0, [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
(np.nanmedian, [bar_to(50), no_bar(), bar_from_to(50, 62.5)]),
],
)
@pytest.mark.parametrize("nans", [True, False])
def test_align_mixed_cases(df_mix, align, exp, nans):
# test different align cases for mixed positive and negative values
# also test no impact of NaNs and no_bar
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
if nans:
df_mix.loc[3, :] = np.nan
expected.update({(3, 0): no_bar()})
result = df_mix.style.bar(align=align)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"align, exp",
[
(
"left",
{
"index": [[no_bar(), no_bar()], [bar_to(100), bar_to(100)]],
"columns": [[no_bar(), bar_to(100)], [no_bar(), bar_to(100)]],
"none": [[no_bar(), bar_to(33.33)], [bar_to(66.66), bar_to(100)]],
},
),
(
"mid",
{
"index": [[bar_to(33.33), bar_to(50)], [bar_to(100), bar_to(100)]],
"columns": [[bar_to(50), bar_to(100)], [bar_to(75), bar_to(100)]],
"none": [[bar_to(25), bar_to(50)], [bar_to(75), bar_to(100)]],
},
),
(
"zero",
{
"index": [
[bar_from_to(50, 66.66), bar_from_to(50, 75)],
[bar_from_to(50, 100), bar_from_to(50, 100)],
],
"columns": [
[bar_from_to(50, 75), bar_from_to(50, 100)],
[bar_from_to(50, 87.5), bar_from_to(50, 100)],
],
"none": [
[bar_from_to(50, 62.5), bar_from_to(50, 75)],
[bar_from_to(50, 87.5), bar_from_to(50, 100)],
],
},
),
(
2,
{
"index": [
[bar_to(50), no_bar()],
[bar_from_to(50, 100), bar_from_to(50, 100)],
],
"columns": [
[bar_to(50), no_bar()],
[bar_from_to(50, 75), bar_from_to(50, 100)],
],
"none": [
[bar_from_to(25, 50), no_bar()],
[bar_from_to(50, 75), bar_from_to(50, 100)],
],
},
),
],
)
@pytest.mark.parametrize("axis", ["index", "columns", "none"])
def test_align_axis(align, exp, axis):
# test all axis combinations with positive values and different aligns
data = DataFrame([[1, 2], [3, 4]])
result = (
data.style.bar(align=align, axis=None if axis == "none" else axis)
._compute()
.ctx
)
expected = {
(0, 0): exp[axis][0][0],
(0, 1): exp[axis][0][1],
(1, 0): exp[axis][1][0],
(1, 1): exp[axis][1][1],
}
assert result == expected
@pytest.mark.parametrize(
"values, vmin, vmax",
[
("positive", 1.5, 2.5),
("negative", -2.5, -1.5),
("mixed", -2.5, 1.5),
],
)
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
def test_vmin_vmax_clipping(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
# test that clipping occurs if any vmin > data_values or vmax < data_values
if align == "mid": # mid acts as left or right in each case
if values == "positive":
align = "left"
elif values == "negative":
align = "right"
df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
vmin = None if nullify == "vmin" else vmin
vmax = None if nullify == "vmax" else vmax
clip_df = df.where(df <= (vmax if vmax else 999), other=vmax)
clip_df = clip_df.where(clip_df >= (vmin if vmin else -999), other=vmin)
result = (
df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
._compute()
.ctx
)
expected = clip_df.style.bar(align=align, color=["red", "green"])._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"values, vmin, vmax",
[
("positive", 0.5, 4.5),
("negative", -4.5, -0.5),
("mixed", -4.5, 4.5),
],
)
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
def test_vmin_vmax_widening(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
# test that widening occurs if any vmax > data_values or vmin < data_values
if align == "mid": # mid acts as left or right in each case
if values == "positive":
align = "left"
elif values == "negative":
align = "right"
df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
vmin = None if nullify == "vmin" else vmin
vmax = None if nullify == "vmax" else vmax
expand_df = df.copy()
expand_df.loc[3, :], expand_df.loc[4, :] = vmin, vmax
result = (
df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
._compute()
.ctx
)
expected = expand_df.style.bar(align=align, color=["red", "green"])._compute().ctx
assert result.items() <= expected.items()
def test_numerics():
# test data is pre-selected for numeric values
data = DataFrame([[1, "a"], [2, "b"]])
result = data.style.bar()._compute().ctx
assert (0, 1) not in result
assert (1, 1) not in result
@pytest.mark.parametrize(
"align, exp",
[
("left", [no_bar(), bar_to(100, "green")]),
("right", [bar_to(100, "red"), no_bar()]),
("mid", [bar_to(25, "red"), bar_from_to(25, 100, "green")]),
("zero", [bar_from_to(33.33, 50, "red"), bar_from_to(50, 100, "green")]),
],
)
def test_colors_mixed(align, exp):
data = DataFrame([[-1], [3]])
result = data.style.bar(align=align, color=["red", "green"])._compute().ctx
assert result == {(0, 0): exp[0], (1, 0): exp[1]}
def test_bar_align_height():
# test when keyword height is used 'no-repeat center' and 'background-size' present
data = DataFrame([[1], [2]])
result = data.style.bar(align="left", height=50)._compute().ctx
bg_s = "linear-gradient(90deg, #d65f5f 100.0%, transparent 100.0%) no-repeat center"
expected = {
(0, 0): [("width", "10em")],
(1, 0): [
("width", "10em"),
("background", bg_s),
("background-size", "100% 50.0%"),
],
}
assert result == expected
def test_bar_value_error_raises():
df = DataFrame({"A": [-100, -60, -30, -20]})
msg = "`align` should be in {'left', 'right', 'mid', 'mean', 'zero'} or"
with pytest.raises(ValueError, match=msg):
df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]).to_html()
msg = r"`width` must be a value in \[0, 100\]"
with pytest.raises(ValueError, match=msg):
df.style.bar(width=200).to_html()
msg = r"`height` must be a value in \[0, 100\]"
with pytest.raises(ValueError, match=msg):
df.style.bar(height=200).to_html()

View File

@@ -0,0 +1,165 @@
"""
modules collects tests for Styler methods which have been deprecated
"""
import numpy as np
import pytest
jinja2 = pytest.importorskip("jinja2")
from pandas import (
DataFrame,
IndexSlice,
NaT,
Timestamp,
)
import pandas._testing as tm
@pytest.fixture
def df():
return DataFrame({"A": [0, 1], "B": np.random.randn(2)})
@pytest.mark.parametrize("axis", ["index", "columns"])
def test_hide_index_columns(df, axis):
with tm.assert_produces_warning(FutureWarning):
getattr(df.style, "hide_" + axis)()
def test_set_non_numeric_na():
# GH 21527 28358
df = DataFrame(
{
"object": [None, np.nan, "foo"],
"datetime": [None, NaT, Timestamp("20120101")],
}
)
with tm.assert_produces_warning(FutureWarning):
ctx = df.style.set_na_rep("NA")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "NA"
assert ctx["body"][0][2]["display_value"] == "NA"
assert ctx["body"][1][1]["display_value"] == "NA"
assert ctx["body"][1][2]["display_value"] == "NA"
def test_where_with_one_style(df):
# GH 17474
def f(x):
return x > 0.5
style1 = "foo: bar"
with tm.assert_produces_warning(FutureWarning):
result = df.style.where(f, style1)._compute().ctx
expected = {
(r, c): [("foo", "bar")]
for r, row in enumerate(df.index)
for c, col in enumerate(df.columns)
if f(df.loc[row, col])
}
assert result == expected
@pytest.mark.parametrize(
"slice_",
[
IndexSlice[:],
IndexSlice[:, ["A"]],
IndexSlice[[1], :],
IndexSlice[[1], ["A"]],
IndexSlice[:2, ["A", "B"]],
],
)
def test_where_subset(df, slice_):
# GH 17474
def f(x):
return x > 0.5
style1 = "foo: bar"
style2 = "baz: foo"
with tm.assert_produces_warning(FutureWarning):
res = df.style.where(f, style1, style2, subset=slice_)._compute().ctx
expected = {
(r, c): [("foo", "bar") if f(df.loc[row, col]) else ("baz", "foo")]
for r, row in enumerate(df.index)
for c, col in enumerate(df.columns)
if row in df.loc[slice_].index and col in df.loc[slice_].columns
}
assert res == expected
def test_where_subset_compare_with_applymap(df):
# GH 17474
def f(x):
return x > 0.5
style1 = "foo: bar"
style2 = "baz: foo"
def g(x):
return style1 if f(x) else style2
slices = [
IndexSlice[:],
IndexSlice[:, ["A"]],
IndexSlice[[1], :],
IndexSlice[[1], ["A"]],
IndexSlice[:2, ["A", "B"]],
]
for slice_ in slices:
with tm.assert_produces_warning(FutureWarning):
result = df.style.where(f, style1, style2, subset=slice_)._compute().ctx
expected = df.style.applymap(g, subset=slice_)._compute().ctx
assert result == expected
def test_where_kwargs():
df = DataFrame([[1, 2], [3, 4]])
def f(x, val):
return x > val
with tm.assert_produces_warning(FutureWarning):
res = df.style.where(f, "color:green;", "color:red;", val=2)._compute().ctx
expected = {
(0, 0): [("color", "red")],
(0, 1): [("color", "red")],
(1, 0): [("color", "green")],
(1, 1): [("color", "green")],
}
assert res == expected
def test_set_na_rep():
# GH 21527 28358
df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
with tm.assert_produces_warning(FutureWarning):
ctx = df.style.set_na_rep("NA")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "NA"
assert ctx["body"][0][2]["display_value"] == "NA"
with tm.assert_produces_warning(FutureWarning):
ctx = (
df.style.set_na_rep("NA")
.format(None, na_rep="-", subset=["B"])
._translate(True, True)
)
assert ctx["body"][0][1]["display_value"] == "NA"
assert ctx["body"][0][2]["display_value"] == "-"
def test_precision(df):
styler = df.style
with tm.assert_produces_warning(FutureWarning):
s2 = styler.set_precision(1)
assert styler is s2
assert styler.precision == 1
def test_render(df):
with tm.assert_produces_warning(FutureWarning):
df.style.render()

View File

@@ -0,0 +1,444 @@
import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
IndexSlice,
MultiIndex,
NaT,
Timestamp,
option_context,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
from pandas.io.formats.style_render import _str_escape
@pytest.fixture
def df():
return DataFrame(
data=[[0, -0.609], [1, -1.228]],
columns=["A", "B"],
index=["x", "y"],
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
def test_display_format(styler):
ctx = styler.format("{:0.1f}")._translate(True, True)
assert all(["display_value" in c for c in row] for row in ctx["body"])
assert all([len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"])
assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize("columns", [True, False])
def test_display_format_index(styler, index, columns):
exp_index = ["x", "y"]
if index:
styler.format_index(lambda v: v.upper(), axis=0) # test callable
exp_index = ["X", "Y"]
exp_columns = ["A", "B"]
if columns:
styler.format_index("*{}*", axis=1) # test string
exp_columns = ["*A*", "*B*"]
ctx = styler._translate(True, True)
for r, row in enumerate(ctx["body"]):
assert row[0]["display_value"] == exp_index[r]
for c, col in enumerate(ctx["head"][1:]):
assert col["display_value"] == exp_columns[c]
def test_format_dict(styler):
ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "0.0"
assert ctx["body"][0][2]["display_value"] == "-60.90%"
def test_format_index_dict(styler):
ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True)
for i, val in enumerate(["X", "Y"]):
assert ctx["body"][i][0]["display_value"] == val
def test_format_string(styler):
ctx = styler.format("{:.2f}")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "0.00"
assert ctx["body"][0][2]["display_value"] == "-0.61"
assert ctx["body"][1][1]["display_value"] == "1.00"
assert ctx["body"][1][2]["display_value"] == "-1.23"
def test_format_callable(styler):
ctx = styler.format(lambda v: "neg" if v < 0 else "pos")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "pos"
assert ctx["body"][0][2]["display_value"] == "neg"
assert ctx["body"][1][1]["display_value"] == "pos"
assert ctx["body"][1][2]["display_value"] == "neg"
def test_format_with_na_rep():
# GH 21527 28358
df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
ctx = df.style.format(None, na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
ctx = df.style.format("{:.2%}", na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][1]["display_value"] == "110.00%"
assert ctx["body"][1][2]["display_value"] == "120.00%"
ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate(True, True)
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][2]["display_value"] == "120.00%"
def test_format_index_with_na_rep():
df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA])
ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "A"
for i in [2, 3, 4, 5]:
assert ctx["head"][0][i]["display_value"] == "--"
def test_format_non_numeric_na():
# GH 21527 28358
df = DataFrame(
{
"object": [None, np.nan, "foo"],
"datetime": [None, NaT, Timestamp("20120101")],
}
)
ctx = df.style.format(None, na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][1]["display_value"] == "-"
assert ctx["body"][1][2]["display_value"] == "-"
@pytest.mark.parametrize(
"func, attr, kwargs",
[
("format", "_display_funcs", {}),
("format_index", "_display_funcs_index", {"axis": 0}),
("format_index", "_display_funcs_columns", {"axis": 1}),
],
)
def test_format_clear(styler, func, attr, kwargs):
assert (0, 0) not in getattr(styler, attr) # using default
getattr(styler, func)("{:.2f}", **kwargs)
assert (0, 0) in getattr(styler, attr) # formatter is specified
getattr(styler, func)(**kwargs)
assert (0, 0) not in getattr(styler, attr) # formatter cleared to default
@pytest.mark.parametrize(
"escape, exp",
[
("html", "&lt;&gt;&amp;&#34;%$#_{}~^\\~ ^ \\ "),
(
"latex",
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
"\\textbackslash \\space ",
),
],
)
def test_format_escape_html(escape, exp):
chars = '<>&"%$#_{}~^\\~ ^ \\ '
df = DataFrame([[chars]])
s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{chars}&</td>'
assert expected in s.to_html()
# only the value should be escaped before passing to the formatter
s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{exp}&</td>'
assert expected in s.to_html()
# also test format_index()
styler = Styler(DataFrame(columns=[chars]), uuid_len=0)
styler.format_index("&{0}&", escape=None, axis=1)
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&"
styler.format_index("&{0}&", escape=escape, axis=1)
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&"
def test_format_escape_na_rep():
# tests the na_rep is not escaped
df = DataFrame([['<>&"', None]])
s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
assert ex in s.to_html()
assert expected2 in s.to_html()
# also test for format_index()
df = DataFrame(columns=['<>&"', None])
styler = Styler(df, uuid_len=0)
styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1)
ctx = styler._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "X&&lt;&gt;&amp;&#34;>X"
assert ctx["head"][0][2]["display_value"] == "&"
def test_format_escape_floats(styler):
# test given formatter for number format is not impacted by escape
s = styler.format("{:.1f}", escape="html")
for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
assert expected in s.to_html()
# tests precision of floats is not impacted by escape
s = styler.format(precision=1, escape="html")
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
assert expected in s.to_html()
@pytest.mark.parametrize("formatter", [5, True, [2.0]])
@pytest.mark.parametrize("func", ["format", "format_index"])
def test_format_raises(styler, formatter, func):
with pytest.raises(TypeError, match="expected str or callable"):
getattr(styler, func)(formatter)
@pytest.mark.parametrize(
"precision, expected",
[
(1, ["1.0", "2.0", "3.2", "4.6"]),
(2, ["1.00", "2.01", "3.21", "4.57"]),
(3, ["1.000", "2.009", "3.212", "4.566"]),
],
)
def test_format_with_precision(precision, expected):
# Issue #13257
df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566])
styler = Styler(df)
styler.format(precision=precision)
styler.format_index(precision=precision, axis=1)
ctx = styler._translate(True, True)
for col, exp in enumerate(expected):
assert ctx["body"][0][col + 1]["display_value"] == exp # format test
assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
"level, expected",
[
(0, ["X", "X", "_", "_"]), # level int
("zero", ["X", "X", "_", "_"]), # level name
(1, ["_", "_", "X", "X"]), # other level int
("one", ["_", "_", "X", "X"]), # other level name
([0, 1], ["X", "X", "X", "X"]), # both levels
([0, "zero"], ["X", "X", "_", "_"]), # level int and name simultaneous
([0, "one"], ["X", "X", "X", "X"]), # both levels as int and name
(["one", "zero"], ["X", "X", "X", "X"]), # both level names, reversed
],
)
def test_format_index_level(axis, level, expected):
midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"])
df = DataFrame([[1, 2], [3, 4]])
if axis == 0:
df.index = midx
else:
df.columns = midx
styler = df.style.format_index(lambda v: "X", level=level, axis=axis)
ctx = styler._translate(True, True)
if axis == 0: # compare index
result = [ctx["body"][s][0]["display_value"] for s in range(2)]
result += [ctx["body"][s][1]["display_value"] for s in range(2)]
else: # compare columns
result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)]
result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)]
assert expected == result
def test_format_subset():
df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
ctx = df.style.format(
{"a": "{:0.1f}", "b": "{0:.2%}"}, subset=IndexSlice[0, :]
)._translate(True, True)
expected = "0.1"
raw_11 = "1.123400"
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
assert ctx["body"][0][2]["display_value"] == "12.34%"
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, :])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
ctx = df.style.format("{:0.1f}", subset=IndexSlice["a"])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][0][2]["display_value"] == "0.123400"
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, "a"])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
ctx = df.style.format("{:0.1f}", subset=IndexSlice[[0, 1], ["a"]])._translate(
True, True
)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == "1.1"
assert ctx["body"][0][2]["display_value"] == "0.123400"
assert ctx["body"][1][2]["display_value"] == raw_11
@pytest.mark.parametrize("formatter", [None, "{:,.1f}"])
@pytest.mark.parametrize("decimal", [".", "*"])
@pytest.mark.parametrize("precision", [None, 2])
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
def test_format_thousands(formatter, decimal, precision, func, col):
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
result = getattr(styler, func)( # testing float
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
styler = DataFrame([[1000000]], index=[1000000]).style
result = getattr(styler, func)( # testing int
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
result = getattr(styler, func)( # testing complex
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
@pytest.mark.parametrize("formatter", [None, "{:,.4f}"])
@pytest.mark.parametrize("thousands", [None, ",", "*"])
@pytest.mark.parametrize("precision", [None, 4])
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
def test_format_decimal(formatter, thousands, precision, func, col):
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
result = getattr(styler, func)( # testing float
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][col]["display_value"]
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
result = getattr(styler, func)( # testing complex
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][col]["display_value"]
def test_str_escape_error():
msg = "`escape` only permitted in {'html', 'latex'}, got "
with pytest.raises(ValueError, match=msg):
_str_escape("text", "bad_escape")
with pytest.raises(ValueError, match=msg):
_str_escape("text", [])
_str_escape(2.00, "bad_escape") # OK since dtype is float
def test_format_options():
df = DataFrame({"int": [2000, 1], "float": [1.009, None], "str": ["&<", "&~"]})
ctx = df.style._translate(True, True)
# test option: na_rep
assert ctx["body"][1][2]["display_value"] == "nan"
with option_context("styler.format.na_rep", "MISSING"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][2]["display_value"] == "MISSING"
# test option: decimal and precision
assert ctx["body"][0][2]["display_value"] == "1.009000"
with option_context("styler.format.decimal", "_"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][2]["display_value"] == "1_009000"
with option_context("styler.format.precision", 2):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][2]["display_value"] == "1.01"
# test option: thousands
assert ctx["body"][0][1]["display_value"] == "2000"
with option_context("styler.format.thousands", "_"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][1]["display_value"] == "2_000"
# test option: escape
assert ctx["body"][0][3]["display_value"] == "&<"
assert ctx["body"][1][3]["display_value"] == "&~"
with option_context("styler.format.escape", "html"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][3]["display_value"] == "&amp;&lt;"
with option_context("styler.format.escape", "latex"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
# test option: formatter
with option_context("styler.format.formatter", {"int": "{:,.2f}"}):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][1]["display_value"] == "2,000.00"
def test_precision_zero(df):
styler = Styler(df, precision=0)
ctx = styler._translate(True, True)
assert ctx["body"][0][2]["display_value"] == "-1"
assert ctx["body"][1][2]["display_value"] == "-1"
@pytest.mark.parametrize(
"formatter, exp",
[
(lambda x: f"{x:.3f}", "9.000"),
("{:.2f}", "9.00"),
({0: "{:.1f}"}, "9.0"),
(None, "9"),
],
)
def test_formatter_options_validator(formatter, exp):
df = DataFrame([[9]])
with option_context("styler.format.formatter", formatter):
assert f" {exp} " in df.style.to_latex()
def test_formatter_options_raises():
msg = "Value must be an instance of"
with pytest.raises(ValueError, match=msg):
with option_context("styler.format.formatter", ["bad", "type"]):
DataFrame().style.to_latex()
def test_1level_multiindex():
# GH 43383
midx = MultiIndex.from_product([[1, 2]], names=[""])
df = DataFrame(-1, index=midx, columns=[0, 1])
ctx = df.style._translate(True, True)
assert ctx["body"][0][0]["display_value"] == "1"
assert ctx["body"][0][0]["is_visible"] is True
assert ctx["body"][1][0]["display_value"] == "2"
assert ctx["body"][1][0]["is_visible"] is True
def test_boolean_format():
# gh 46384: booleans do not collapse to integer representation on display
df = DataFrame([[True, False]])
ctx = df.style._translate(True, True)
assert ctx["body"][0][1]["display_value"] is True
assert ctx["body"][0][2]["display_value"] is False

View File

@@ -0,0 +1,219 @@
import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
IndexSlice,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture(params=[(None, "float64"), (NA, "Int64")])
def df(request):
# GH 45804
return DataFrame(
{"A": [0, np.nan, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1]
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
def test_highlight_null(styler):
result = styler.highlight_null()._compute().ctx
expected = {
(1, 0): [("background-color", "red")],
(1, 1): [("background-color", "red")],
}
assert result == expected
def test_highlight_null_subset(styler):
# GH 31345
result = (
styler.highlight_null(null_color="red", subset=["A"])
.highlight_null(null_color="green", subset=["B"])
._compute()
.ctx
)
expected = {
(1, 0): [("background-color", "red")],
(1, 1): [("background-color", "green")],
}
assert result == expected
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
def test_highlight_minmax_basic(df, f):
expected = {
(0, 1): [("background-color", "red")],
# ignores NaN row,
(2, 0): [("background-color", "red")],
}
if f == "highlight_min":
df = -df
result = getattr(df.style, f)(axis=1, color="red")._compute().ctx
assert result == expected
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
@pytest.mark.parametrize(
"kwargs",
[
{"axis": None, "color": "red"}, # test axis
{"axis": 0, "subset": ["A"], "color": "red"}, # test subset and ignores NaN
{"axis": None, "props": "background-color: red"}, # test props
],
)
def test_highlight_minmax_ext(df, f, kwargs):
expected = {(2, 0): [("background-color", "red")]}
if f == "highlight_min":
df = -df
result = getattr(df.style, f)(**kwargs)._compute().ctx
assert result == expected
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
@pytest.mark.parametrize("axis", [None, 0, 1])
def test_highlight_minmax_nulls(f, axis):
# GH 42750
expected = {
(1, 0): [("background-color", "yellow")],
(1, 1): [("background-color", "yellow")],
}
if axis == 1:
expected.update({(2, 1): [("background-color", "yellow")]})
if f == "highlight_max":
df = DataFrame({"a": [NA, 1, None], "b": [np.nan, 1, -1]})
else:
df = DataFrame({"a": [NA, -1, None], "b": [np.nan, -1, 1]})
result = getattr(df.style, f)(axis=axis)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"kwargs",
[
{"left": 0, "right": 1}, # test basic range
{"left": 0, "right": 1, "props": "background-color: yellow"}, # test props
{"left": -100, "right": 100, "subset": IndexSlice[[0, 1], :]}, # test subset
{"left": 0, "subset": IndexSlice[[0, 1], :]}, # test no right
{"right": 1}, # test no left
{"left": [0, 0, 11], "axis": 0}, # test left as sequence
{"left": DataFrame({"A": [0, 0, 11], "B": [1, 1, 11]}), "axis": None}, # axis
{"left": 0, "right": [0, 1], "axis": 1}, # test sequence right
],
)
def test_highlight_between(styler, kwargs):
expected = {
(0, 0): [("background-color", "yellow")],
(0, 1): [("background-color", "yellow")],
}
result = styler.highlight_between(**kwargs)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"arg, map, axis",
[
("left", [1, 2], 0), # 0 axis has 3 elements not 2
("left", [1, 2, 3], 1), # 1 axis has 2 elements not 3
("left", np.array([[1, 2], [1, 2]]), None), # df is (2,3) not (2,2)
("right", [1, 2], 0), # same tests as above for 'right' not 'left'
("right", [1, 2, 3], 1), # ..
("right", np.array([[1, 2], [1, 2]]), None), # ..
],
)
def test_highlight_between_raises(arg, styler, map, axis):
msg = f"supplied '{arg}' is not correct shape"
with pytest.raises(ValueError, match=msg):
styler.highlight_between(**{arg: map, "axis": axis})._compute()
def test_highlight_between_raises2(styler):
msg = "values can be 'both', 'left', 'right', or 'neither'"
with pytest.raises(ValueError, match=msg):
styler.highlight_between(inclusive="badstring")._compute()
with pytest.raises(ValueError, match=msg):
styler.highlight_between(inclusive=1)._compute()
@pytest.mark.parametrize(
"inclusive, expected",
[
(
"both",
{
(0, 0): [("background-color", "yellow")],
(0, 1): [("background-color", "yellow")],
},
),
("neither", {}),
("left", {(0, 0): [("background-color", "yellow")]}),
("right", {(0, 1): [("background-color", "yellow")]}),
],
)
def test_highlight_between_inclusive(styler, inclusive, expected):
kwargs = {"left": 0, "right": 1, "subset": IndexSlice[[0, 1], :]}
result = styler.highlight_between(**kwargs, inclusive=inclusive)._compute()
assert result.ctx == expected
@pytest.mark.parametrize(
"kwargs",
[
{"q_left": 0.5, "q_right": 1, "axis": 0}, # base case
{"q_left": 0.5, "q_right": 1, "axis": None}, # test axis
{"q_left": 0, "q_right": 1, "subset": IndexSlice[2, :]}, # test subset
{"q_left": 0.5, "axis": 0}, # test no high
{"q_right": 1, "subset": IndexSlice[2, :], "axis": 1}, # test no low
{"q_left": 0.5, "axis": 0, "props": "background-color: yellow"}, # tst prop
],
)
def test_highlight_quantile(styler, kwargs):
expected = {
(2, 0): [("background-color", "yellow")],
(2, 1): [("background-color", "yellow")],
}
result = styler.highlight_quantile(**kwargs)._compute().ctx
assert result == expected
@pytest.mark.skipif(np.__version__[:4] in ["1.16", "1.17"], reason="Numpy Issue #14831")
@pytest.mark.parametrize(
"f,kwargs",
[
("highlight_min", {"axis": 1, "subset": IndexSlice[1, :]}),
("highlight_max", {"axis": 0, "subset": [0]}),
("highlight_quantile", {"axis": None, "q_left": 0.6, "q_right": 0.8}),
("highlight_between", {"subset": [0]}),
],
)
@pytest.mark.parametrize(
"df",
[
DataFrame([[0, 10], [20, 30]], dtype=int),
DataFrame([[0, 10], [20, 30]], dtype=float),
DataFrame([[0, 10], [20, 30]], dtype="datetime64[ns]"),
DataFrame([[0, 10], [20, 30]], dtype=str),
DataFrame([[0, 10], [20, 30]], dtype="timedelta64[ns]"),
],
)
def test_all_highlight_dtypes(f, kwargs, df):
if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)):
return None # quantile incompatible with str
if f == "highlight_between":
kwargs["left"] = df.iloc[1, 0] # set the range low for testing
expected = {(1, 0): [("background-color", "yellow")]}
result = getattr(df.style, f)(**kwargs)._compute().ctx
assert result == expected

View File

@@ -0,0 +1,818 @@
from textwrap import dedent
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
option_context,
)
jinja2 = pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
loader = jinja2.PackageLoader("pandas", "io/formats/templates")
env = jinja2.Environment(loader=loader, trim_blocks=True)
@pytest.fixture
def styler():
return Styler(DataFrame([[2.61], [2.69]], index=["a", "b"], columns=["A"]))
@pytest.fixture
def styler_mi():
midx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
return Styler(DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx))
@pytest.fixture
def tpl_style():
return env.get_template("html_style.tpl")
@pytest.fixture
def tpl_table():
return env.get_template("html_table.tpl")
def test_html_template_extends_options():
# make sure if templates are edited tests are updated as are setup fixtures
# to understand the dependency
with open("pandas/io/formats/templates/html.tpl") as file:
result = file.read()
assert "{% include html_style_tpl %}" in result
assert "{% include html_table_tpl %}" in result
def test_exclude_styles(styler):
result = styler.to_html(exclude_styles=True, doctype_html=True)
expected = dedent(
"""\
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
</head>
<body>
<table>
<thead>
<tr>
<th >&nbsp;</th>
<th >A</th>
</tr>
</thead>
<tbody>
<tr>
<th >a</th>
<td >2.610000</td>
</tr>
<tr>
<th >b</th>
<td >2.690000</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
)
assert result == expected
def test_w3_html_format(styler):
styler.set_uuid("").set_table_styles(
[{"selector": "th", "props": "att2:v2;"}]
).applymap(lambda x: "att1:v1;").set_table_attributes(
'class="my-cls1" style="attr3:v3;"'
).set_td_classes(
DataFrame(["my-cls2"], index=["a"], columns=["A"])
).format(
"{:.1f}"
).set_caption(
"A comprehensive test"
)
expected = dedent(
"""\
<style type="text/css">
#T_ th {
att2: v2;
}
#T__row0_col0, #T__row1_col0 {
att1: v1;
}
</style>
<table id="T_" class="my-cls1" style="attr3:v3;">
<caption>A comprehensive test</caption>
<thead>
<tr>
<th class="blank level0" >&nbsp;</th>
<th id="T__level0_col0" class="col_heading level0 col0" >A</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T__level0_row0" class="row_heading level0 row0" >a</th>
<td id="T__row0_col0" class="data row0 col0 my-cls2" >2.6</td>
</tr>
<tr>
<th id="T__level0_row1" class="row_heading level0 row1" >b</th>
<td id="T__row1_col0" class="data row1 col0" >2.7</td>
</tr>
</tbody>
</table>
"""
)
assert expected == styler.to_html()
def test_colspan_w3():
# GH 36223
df = DataFrame(data=[[1, 2]], columns=[["l0", "l0"], ["l1a", "l1b"]])
styler = Styler(df, uuid="_", cell_ids=False)
assert '<th class="col_heading level0 col0" colspan="2">l0</th>' in styler.to_html()
def test_rowspan_w3():
# GH 38533
df = DataFrame(data=[[1, 2]], index=[["l0", "l0"], ["l1a", "l1b"]])
styler = Styler(df, uuid="_", cell_ids=False)
assert '<th class="row_heading level0 row0" rowspan="2">l0</th>' in styler.to_html()
def test_styles(styler):
styler.set_uuid("abc")
styler.set_table_styles([{"selector": "td", "props": "color: red;"}])
result = styler.to_html(doctype_html=True)
expected = dedent(
"""\
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style type="text/css">
#T_abc td {
color: red;
}
</style>
</head>
<body>
<table id="T_abc">
<thead>
<tr>
<th class="blank level0" >&nbsp;</th>
<th id="T_abc_level0_col0" class="col_heading level0 col0" >A</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T_abc_level0_row0" class="row_heading level0 row0" >a</th>
<td id="T_abc_row0_col0" class="data row0 col0" >2.610000</td>
</tr>
<tr>
<th id="T_abc_level0_row1" class="row_heading level0 row1" >b</th>
<td id="T_abc_row1_col0" class="data row1 col0" >2.690000</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
)
assert result == expected
def test_doctype(styler):
result = styler.to_html(doctype_html=False)
assert "<html>" not in result
assert "<body>" not in result
assert "<!DOCTYPE html>" not in result
assert "<head>" not in result
def test_doctype_encoding(styler):
with option_context("styler.render.encoding", "ASCII"):
result = styler.to_html(doctype_html=True)
assert '<meta charset="ASCII">' in result
result = styler.to_html(doctype_html=True, encoding="ANSI")
assert '<meta charset="ANSI">' in result
def test_bold_headers_arg(styler):
result = styler.to_html(bold_headers=True)
assert "th {\n font-weight: bold;\n}" in result
result = styler.to_html()
assert "th {\n font-weight: bold;\n}" not in result
def test_caption_arg(styler):
result = styler.to_html(caption="foo bar")
assert "<caption>foo bar</caption>" in result
result = styler.to_html()
assert "<caption>foo bar</caption>" not in result
def test_block_names(tpl_style, tpl_table):
# catch accidental removal of a block
expected_style = {
"before_style",
"style",
"table_styles",
"before_cellstyle",
"cellstyle",
}
expected_table = {
"before_table",
"table",
"caption",
"thead",
"tbody",
"after_table",
"before_head_rows",
"head_tr",
"after_head_rows",
"before_rows",
"tr",
"after_rows",
}
result1 = set(tpl_style.blocks)
assert result1 == expected_style
result2 = set(tpl_table.blocks)
assert result2 == expected_table
def test_from_custom_template_table(tmpdir):
p = tmpdir.mkdir("tpl").join("myhtml_table.tpl")
p.write(
dedent(
"""\
{% extends "html_table.tpl" %}
{% block table %}
<h1>{{custom_title}}</h1>
{{ super() }}
{% endblock table %}"""
)
)
result = Styler.from_custom_template(str(tmpdir.join("tpl")), "myhtml_table.tpl")
assert issubclass(result, Styler)
assert result.env is not Styler.env
assert result.template_html_table is not Styler.template_html_table
styler = result(DataFrame({"A": [1, 2]}))
assert "<h1>My Title</h1>\n\n\n<table" in styler.to_html(custom_title="My Title")
def test_from_custom_template_style(tmpdir):
p = tmpdir.mkdir("tpl").join("myhtml_style.tpl")
p.write(
dedent(
"""\
{% extends "html_style.tpl" %}
{% block style %}
<link rel="stylesheet" href="mystyle.css">
{{ super() }}
{% endblock style %}"""
)
)
result = Styler.from_custom_template(
str(tmpdir.join("tpl")), html_style="myhtml_style.tpl"
)
assert issubclass(result, Styler)
assert result.env is not Styler.env
assert result.template_html_style is not Styler.template_html_style
styler = result(DataFrame({"A": [1, 2]}))
assert '<link rel="stylesheet" href="mystyle.css">\n\n<style' in styler.to_html()
def test_caption_as_sequence(styler):
styler.set_caption(("full cap", "short cap"))
assert "<caption>full cap</caption>" in styler.to_html()
@pytest.mark.parametrize("index", [False, True])
@pytest.mark.parametrize("columns", [False, True])
@pytest.mark.parametrize("index_name", [True, False])
def test_sticky_basic(styler, index, columns, index_name):
if index_name:
styler.index.name = "some text"
if index:
styler.set_sticky(axis=0)
if columns:
styler.set_sticky(axis=1)
left_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" left: 0px;\n z-index: {1};\n}}"
)
top_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" top: {1}px;\n z-index: {2};\n{3}}}"
)
res = styler.set_uuid("").to_html()
# test index stickys over thead and tbody
assert (left_css.format("thead tr th:nth-child(1)", "3 !important") in res) is index
assert (left_css.format("tbody tr th:nth-child(1)", "1") in res) is index
# test column stickys including if name row
assert (
top_css.format("thead tr:nth-child(1) th", "0", "2", " height: 25px;\n") in res
) is (columns and index_name)
assert (
top_css.format("thead tr:nth-child(2) th", "25", "2", " height: 25px;\n")
in res
) is (columns and index_name)
assert (top_css.format("thead tr:nth-child(1) th", "0", "2", "") in res) is (
columns and not index_name
)
@pytest.mark.parametrize("index", [False, True])
@pytest.mark.parametrize("columns", [False, True])
def test_sticky_mi(styler_mi, index, columns):
if index:
styler_mi.set_sticky(axis=0)
if columns:
styler_mi.set_sticky(axis=1)
left_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}"
)
top_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" top: {1}px;\n height: 25px;\n z-index: {2};\n}}"
)
res = styler_mi.set_uuid("").to_html()
# test the index stickys for thead and tbody over both levels
assert (
left_css.format("thead tr th:nth-child(1)", "0", "3 !important") in res
) is index
assert (left_css.format("tbody tr th.level0", "0", "1") in res) is index
assert (
left_css.format("thead tr th:nth-child(2)", "75", "3 !important") in res
) is index
assert (left_css.format("tbody tr th.level1", "75", "1") in res) is index
# test the column stickys for each level row
assert (top_css.format("thead tr:nth-child(1) th", "0", "2") in res) is columns
assert (top_css.format("thead tr:nth-child(2) th", "25", "2") in res) is columns
@pytest.mark.parametrize("index", [False, True])
@pytest.mark.parametrize("columns", [False, True])
@pytest.mark.parametrize("levels", [[1], ["one"], "one"])
def test_sticky_levels(styler_mi, index, columns, levels):
styler_mi.index.names, styler_mi.columns.names = ["zero", "one"], ["zero", "one"]
if index:
styler_mi.set_sticky(axis=0, levels=levels)
if columns:
styler_mi.set_sticky(axis=1, levels=levels)
left_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}"
)
top_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" top: {1}px;\n height: 25px;\n z-index: {2};\n}}"
)
res = styler_mi.set_uuid("").to_html()
# test no sticking of level0
assert "#T_ thead tr th:nth-child(1)" not in res
assert "#T_ tbody tr th.level0" not in res
assert "#T_ thead tr:nth-child(1) th" not in res
# test sticking level1
assert (
left_css.format("thead tr th:nth-child(2)", "0", "3 !important") in res
) is index
assert (left_css.format("tbody tr th.level1", "0", "1") in res) is index
assert (top_css.format("thead tr:nth-child(2) th", "0", "2") in res) is columns
def test_sticky_raises(styler):
with pytest.raises(ValueError, match="No axis named bad for object type DataFrame"):
styler.set_sticky(axis="bad")
@pytest.mark.parametrize(
"sparse_index, sparse_columns",
[(True, True), (True, False), (False, True), (False, False)],
)
def test_sparse_options(sparse_index, sparse_columns):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=ridx, columns=cidx)
styler = df.style
default_html = styler.to_html() # defaults under pd.options to (True , True)
with option_context(
"styler.sparse.index", sparse_index, "styler.sparse.columns", sparse_columns
):
html1 = styler.to_html()
assert (html1 == default_html) is (sparse_index and sparse_columns)
html2 = styler.to_html(sparse_index=sparse_index, sparse_columns=sparse_columns)
assert html1 == html2
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize("columns", [True, False])
def test_applymap_header_cell_ids(styler, index, columns):
# GH 41893
func = lambda v: "attr: val;"
styler.uuid, styler.cell_ids = "", False
if index:
styler.applymap_index(func, axis="index")
if columns:
styler.applymap_index(func, axis="columns")
result = styler.to_html()
# test no data cell ids
assert '<td class="data row0 col0" >2.610000</td>' in result
assert '<td class="data row1 col0" >2.690000</td>' in result
# test index header ids where needed and css styles
assert (
'<th id="T__level0_row0" class="row_heading level0 row0" >a</th>' in result
) is index
assert (
'<th id="T__level0_row1" class="row_heading level0 row1" >b</th>' in result
) is index
assert ("#T__level0_row0, #T__level0_row1 {\n attr: val;\n}" in result) is index
# test column header ids where needed and css styles
assert (
'<th id="T__level0_col0" class="col_heading level0 col0" >A</th>' in result
) is columns
assert ("#T__level0_col0 {\n attr: val;\n}" in result) is columns
@pytest.mark.parametrize("rows", [True, False])
@pytest.mark.parametrize("cols", [True, False])
def test_maximums(styler_mi, rows, cols):
result = styler_mi.to_html(
max_rows=2 if rows else None,
max_columns=2 if cols else None,
)
assert ">5</td>" in result # [[0,1], [4,5]] always visible
assert (">8</td>" in result) is not rows # first trimmed vertical element
assert (">2</td>" in result) is not cols # first trimmed horizontal element
def test_replaced_css_class_names(styler_mi):
css = {
"row_heading": "ROWHEAD",
# "col_heading": "COLHEAD",
"index_name": "IDXNAME",
# "col": "COL",
"row": "ROW",
# "col_trim": "COLTRIM",
"row_trim": "ROWTRIM",
"level": "LEVEL",
"data": "DATA",
"blank": "BLANK",
}
midx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
styler_mi = Styler(
DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx),
uuid_len=0,
).set_table_styles(css_class_names=css)
styler_mi.index.names = ["n1", "n2"]
styler_mi.hide(styler_mi.index[1:], axis=0)
styler_mi.hide(styler_mi.columns[1:], axis=1)
styler_mi.applymap_index(lambda v: "color: red;", axis=0)
styler_mi.applymap_index(lambda v: "color: green;", axis=1)
styler_mi.applymap(lambda v: "color: blue;")
expected = dedent(
"""\
<style type="text/css">
#T__ROW0_col0 {
color: blue;
}
#T__LEVEL0_ROW0, #T__LEVEL1_ROW0 {
color: red;
}
#T__LEVEL0_col0, #T__LEVEL1_col0 {
color: green;
}
</style>
<table id="T_">
<thead>
<tr>
<th class="BLANK" >&nbsp;</th>
<th class="IDXNAME LEVEL0" >n1</th>
<th id="T__LEVEL0_col0" class="col_heading LEVEL0 col0" >a</th>
</tr>
<tr>
<th class="BLANK" >&nbsp;</th>
<th class="IDXNAME LEVEL1" >n2</th>
<th id="T__LEVEL1_col0" class="col_heading LEVEL1 col0" >c</th>
</tr>
<tr>
<th class="IDXNAME LEVEL0" >n1</th>
<th class="IDXNAME LEVEL1" >n2</th>
<th class="BLANK col0" >&nbsp;</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T__LEVEL0_ROW0" class="ROWHEAD LEVEL0 ROW0" >a</th>
<th id="T__LEVEL1_ROW0" class="ROWHEAD LEVEL1 ROW0" >c</th>
<td id="T__ROW0_col0" class="DATA ROW0 col0" >0</td>
</tr>
</tbody>
</table>
"""
)
result = styler_mi.to_html()
assert result == expected
def test_include_css_style_rules_only_for_visible_cells(styler_mi):
# GH 43619
result = (
styler_mi.set_uuid("")
.applymap(lambda v: "color: blue;")
.hide(styler_mi.data.columns[1:], axis="columns")
.hide(styler_mi.data.index[1:], axis="index")
.to_html()
)
expected_styles = dedent(
"""\
<style type="text/css">
#T__row0_col0 {
color: blue;
}
</style>
"""
)
assert expected_styles in result
def test_include_css_style_rules_only_for_visible_index_labels(styler_mi):
# GH 43619
result = (
styler_mi.set_uuid("")
.applymap_index(lambda v: "color: blue;", axis="index")
.hide(styler_mi.data.columns, axis="columns")
.hide(styler_mi.data.index[1:], axis="index")
.to_html()
)
expected_styles = dedent(
"""\
<style type="text/css">
#T__level0_row0, #T__level1_row0 {
color: blue;
}
</style>
"""
)
assert expected_styles in result
def test_include_css_style_rules_only_for_visible_column_labels(styler_mi):
# GH 43619
result = (
styler_mi.set_uuid("")
.applymap_index(lambda v: "color: blue;", axis="columns")
.hide(styler_mi.data.columns[1:], axis="columns")
.hide(styler_mi.data.index, axis="index")
.to_html()
)
expected_styles = dedent(
"""\
<style type="text/css">
#T__level0_col0, #T__level1_col0 {
color: blue;
}
</style>
"""
)
assert expected_styles in result
def test_hiding_index_columns_multiindex_alignment():
# gh 43644
midx = MultiIndex.from_product(
[["i0", "j0"], ["i1"], ["i2", "j2"]], names=["i-0", "i-1", "i-2"]
)
cidx = MultiIndex.from_product(
[["c0"], ["c1", "d1"], ["c2", "d2"]], names=["c-0", "c-1", "c-2"]
)
df = DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=cidx)
styler = Styler(df, uuid_len=0)
styler.hide(level=1, axis=0).hide(level=0, axis=1)
styler.hide([("j0", "i1", "j2")], axis=0)
styler.hide([("c0", "d1", "d2")], axis=1)
result = styler.to_html()
expected = dedent(
"""\
<style type="text/css">
</style>
<table id="T_">
<thead>
<tr>
<th class="blank" >&nbsp;</th>
<th class="index_name level1" >c-1</th>
<th id="T__level1_col0" class="col_heading level1 col0" colspan="2">c1</th>
<th id="T__level1_col2" class="col_heading level1 col2" >d1</th>
</tr>
<tr>
<th class="blank" >&nbsp;</th>
<th class="index_name level2" >c-2</th>
<th id="T__level2_col0" class="col_heading level2 col0" >c2</th>
<th id="T__level2_col1" class="col_heading level2 col1" >d2</th>
<th id="T__level2_col2" class="col_heading level2 col2" >c2</th>
</tr>
<tr>
<th class="index_name level0" >i-0</th>
<th class="index_name level2" >i-2</th>
<th class="blank col0" >&nbsp;</th>
<th class="blank col1" >&nbsp;</th>
<th class="blank col2" >&nbsp;</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T__level0_row0" class="row_heading level0 row0" rowspan="2">i0</th>
<th id="T__level2_row0" class="row_heading level2 row0" >i2</th>
<td id="T__row0_col0" class="data row0 col0" >0</td>
<td id="T__row0_col1" class="data row0 col1" >1</td>
<td id="T__row0_col2" class="data row0 col2" >2</td>
</tr>
<tr>
<th id="T__level2_row1" class="row_heading level2 row1" >j2</th>
<td id="T__row1_col0" class="data row1 col0" >4</td>
<td id="T__row1_col1" class="data row1 col1" >5</td>
<td id="T__row1_col2" class="data row1 col2" >6</td>
</tr>
<tr>
<th id="T__level0_row2" class="row_heading level0 row2" >j0</th>
<th id="T__level2_row2" class="row_heading level2 row2" >i2</th>
<td id="T__row2_col0" class="data row2 col0" >8</td>
<td id="T__row2_col1" class="data row2 col1" >9</td>
<td id="T__row2_col2" class="data row2 col2" >10</td>
</tr>
</tbody>
</table>
"""
)
assert result == expected
def test_hiding_index_columns_multiindex_trimming():
# gh 44272
df = DataFrame(np.arange(64).reshape(8, 8))
df.columns = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]])
df.index = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]])
df.index.names, df.columns.names = ["a", "b"], ["c", "d"]
styler = Styler(df, cell_ids=False, uuid_len=0)
styler.hide([(0, 0), (0, 1), (1, 0)], axis=1).hide([(0, 0), (0, 1), (1, 0)], axis=0)
with option_context("styler.render.max_rows", 4, "styler.render.max_columns", 4):
result = styler.to_html()
expected = dedent(
"""\
<style type="text/css">
</style>
<table id="T_">
<thead>
<tr>
<th class="blank" >&nbsp;</th>
<th class="index_name level0" >c</th>
<th class="col_heading level0 col3" >1</th>
<th class="col_heading level0 col4" colspan="2">2</th>
<th class="col_heading level0 col6" >3</th>
</tr>
<tr>
<th class="blank" >&nbsp;</th>
<th class="index_name level1" >d</th>
<th class="col_heading level1 col3" >1</th>
<th class="col_heading level1 col4" >0</th>
<th class="col_heading level1 col5" >1</th>
<th class="col_heading level1 col6" >0</th>
<th class="col_heading level1 col_trim" >...</th>
</tr>
<tr>
<th class="index_name level0" >a</th>
<th class="index_name level1" >b</th>
<th class="blank col3" >&nbsp;</th>
<th class="blank col4" >&nbsp;</th>
<th class="blank col5" >&nbsp;</th>
<th class="blank col6" >&nbsp;</th>
<th class="blank col7 col_trim" >&nbsp;</th>
</tr>
</thead>
<tbody>
<tr>
<th class="row_heading level0 row3" >1</th>
<th class="row_heading level1 row3" >1</th>
<td class="data row3 col3" >27</td>
<td class="data row3 col4" >28</td>
<td class="data row3 col5" >29</td>
<td class="data row3 col6" >30</td>
<td class="data row3 col_trim" >...</td>
</tr>
<tr>
<th class="row_heading level0 row4" rowspan="2">2</th>
<th class="row_heading level1 row4" >0</th>
<td class="data row4 col3" >35</td>
<td class="data row4 col4" >36</td>
<td class="data row4 col5" >37</td>
<td class="data row4 col6" >38</td>
<td class="data row4 col_trim" >...</td>
</tr>
<tr>
<th class="row_heading level1 row5" >1</th>
<td class="data row5 col3" >43</td>
<td class="data row5 col4" >44</td>
<td class="data row5 col5" >45</td>
<td class="data row5 col6" >46</td>
<td class="data row5 col_trim" >...</td>
</tr>
<tr>
<th class="row_heading level0 row6" >3</th>
<th class="row_heading level1 row6" >0</th>
<td class="data row6 col3" >51</td>
<td class="data row6 col4" >52</td>
<td class="data row6 col5" >53</td>
<td class="data row6 col6" >54</td>
<td class="data row6 col_trim" >...</td>
</tr>
<tr>
<th class="row_heading level0 row_trim" >...</th>
<th class="row_heading level1 row_trim" >...</th>
<td class="data col3 row_trim" >...</td>
<td class="data col4 row_trim" >...</td>
<td class="data col5 row_trim" >...</td>
<td class="data col6 row_trim" >...</td>
<td class="data row_trim col_trim" >...</td>
</tr>
</tbody>
</table>
"""
)
assert result == expected
@pytest.mark.parametrize("type", ["data", "index"])
@pytest.mark.parametrize(
"text, exp, found",
[
("no link, just text", False, ""),
("subdomain not www: sub.web.com", False, ""),
("www subdomain: www.web.com other", True, "www.web.com"),
("scheme full structure: http://www.web.com", True, "http://www.web.com"),
("scheme no top-level: http://www.web", True, "http://www.web"),
("no scheme, no top-level: www.web", False, "www.web"),
("https scheme: https://www.web.com", True, "https://www.web.com"),
("ftp scheme: ftp://www.web", True, "ftp://www.web"),
("ftps scheme: ftps://www.web", True, "ftps://www.web"),
("subdirectories: www.web.com/directory", True, "www.web.com/directory"),
("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"),
("with port: http://web.com:80", True, "http://web.com:80"),
(
"full net_loc scheme: http://user:pass@web.com",
True,
"http://user:pass@web.com",
),
(
"with valid special chars: http://web.com/,.':;~!@#$*()[]",
True,
"http://web.com/,.':;~!@#$*()[]",
),
],
)
def test_rendered_links(type, text, exp, found):
if type == "data":
df = DataFrame([text])
styler = df.style.format(hyperlinks="html")
else:
df = DataFrame([0], index=[text])
styler = df.style.format_index(hyperlinks="html")
rendered = '<a href="{0}" target="_blank">{0}</a>'.format(found)
result = styler.to_html()
assert (rendered in result) is exp
assert (text in result) is not exp # test conversion done when expected and not
def test_multiple_rendered_links():
links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e")
df = DataFrame(["text {} {} text {} {}".format(*links)])
result = df.style.format(hyperlinks="html").to_html()
href = '<a href="{0}" target="_blank">{0}</a>'
for link in links:
assert href.format(link) in result
assert href.format("text") not in result

View File

@@ -0,0 +1,286 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
IndexSlice,
Series,
)
pytest.importorskip("matplotlib")
pytest.importorskip("jinja2")
import matplotlib as mpl
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame([[1, 2], [2, 4]], columns=["A", "B"])
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
@pytest.fixture
def df_blank():
return DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"])
@pytest.fixture
def styler_blank(df_blank):
return Styler(df_blank, uuid_len=0)
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
def test_function_gradient(styler, f):
for c_map in [None, "YlOrRd"]:
result = getattr(styler, f)(cmap=c_map)._compute().ctx
assert all("#" in x[0][1] for x in result.values())
assert result[(0, 0)] == result[(0, 1)]
assert result[(1, 0)] == result[(1, 1)]
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
def test_background_gradient_color(styler, f):
result = getattr(styler, f)(subset=IndexSlice[1, "A"])._compute().ctx
if f == "background_gradient":
assert result[(1, 0)] == [("background-color", "#fff7fb"), ("color", "#000000")]
elif f == "text_gradient":
assert result[(1, 0)] == [("color", "#fff7fb")]
@pytest.mark.parametrize(
"axis, expected",
[
(0, ["low", "low", "high", "high"]),
(1, ["low", "high", "low", "high"]),
(None, ["low", "mid", "mid", "high"]),
],
)
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
def test_background_gradient_axis(styler, axis, expected, f):
if f == "background_gradient":
colors = {
"low": [("background-color", "#f7fbff"), ("color", "#000000")],
"mid": [("background-color", "#abd0e6"), ("color", "#000000")],
"high": [("background-color", "#08306b"), ("color", "#f1f1f1")],
}
elif f == "text_gradient":
colors = {
"low": [("color", "#f7fbff")],
"mid": [("color", "#abd0e6")],
"high": [("color", "#08306b")],
}
result = getattr(styler, f)(cmap="Blues", axis=axis)._compute().ctx
for i, cell in enumerate([(0, 0), (0, 1), (1, 0), (1, 1)]):
assert result[cell] == colors[expected[i]]
@pytest.mark.parametrize(
"cmap, expected",
[
(
"PuBu",
{
(4, 5): [("background-color", "#86b0d3"), ("color", "#000000")],
(4, 6): [("background-color", "#83afd3"), ("color", "#f1f1f1")],
},
),
(
"YlOrRd",
{
(4, 8): [("background-color", "#fd913e"), ("color", "#000000")],
(4, 9): [("background-color", "#fd8f3d"), ("color", "#f1f1f1")],
},
),
(
None,
{
(7, 0): [("background-color", "#48c16e"), ("color", "#f1f1f1")],
(7, 1): [("background-color", "#4cc26c"), ("color", "#000000")],
},
),
],
)
def test_text_color_threshold(cmap, expected):
# GH 39888
df = DataFrame(np.arange(100).reshape(10, 10))
result = df.style.background_gradient(cmap=cmap, axis=None)._compute().ctx
for k in expected.keys():
assert result[k] == expected[k]
def test_background_gradient_vmin_vmax():
# GH 12145
df = DataFrame(range(5))
ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx
assert ctx[(0, 0)] == ctx[(1, 0)]
assert ctx[(4, 0)] == ctx[(3, 0)]
def test_background_gradient_int64():
# GH 28869
df1 = Series(range(3)).to_frame()
df2 = Series(range(3), dtype="Int64").to_frame()
ctx1 = df1.style.background_gradient()._compute().ctx
ctx2 = df2.style.background_gradient()._compute().ctx
assert ctx2[(0, 0)] == ctx1[(0, 0)]
assert ctx2[(1, 0)] == ctx1[(1, 0)]
assert ctx2[(2, 0)] == ctx1[(2, 0)]
@pytest.mark.parametrize(
"axis, gmap, expected",
[
(
0,
[1, 2],
{
(0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
(0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
},
),
(
1,
[1, 2],
{
(0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
},
),
(
None,
np.array([[2, 1], [1, 2]]),
{
(0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
(1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
},
),
],
)
def test_background_gradient_gmap_array(styler_blank, axis, gmap, expected):
# tests when gmap is given as a sequence and converted to ndarray
result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)]
)
def test_background_gradient_gmap_array_raises(gmap, axis):
# test when gmap as converted ndarray is bad shape
df = DataFrame([[0, 0, 0], [0, 0, 0]])
msg = "supplied 'gmap' is not correct shape"
with pytest.raises(ValueError, match=msg):
df.style.background_gradient(gmap=gmap, axis=axis)._compute()
@pytest.mark.parametrize(
"gmap",
[
DataFrame( # reverse the columns
[[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"]
),
DataFrame( # reverse the index
[[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"]
),
DataFrame( # reverse the index and columns
[[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"]
),
DataFrame( # add unnecessary columns
[[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"]
),
DataFrame( # add unnecessary index
[[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"]
),
],
)
@pytest.mark.parametrize(
"subset, exp_gmap", # exp_gmap is underlying map DataFrame should conform to
[
(None, [[1, 2], [2, 1]]),
(["A"], [[1], [2]]), # slice only column "A" in data and gmap
(["B", "A"], [[2, 1], [1, 2]]), # reverse the columns in data
(IndexSlice["X", :], [[1, 2]]), # slice only index "X" in data and gmap
(IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]), # reverse the index in data
],
)
def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, exp_gmap):
# test gmap given as DataFrame that it aligns to the the data including subset
expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap, subset=subset)
result = styler_blank.background_gradient(axis=None, gmap=gmap, subset=subset)
assert expected._compute().ctx == result._compute().ctx
@pytest.mark.parametrize(
"gmap, axis, exp_gmap",
[
(Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # revrse the index
(Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # revrse the cols
(Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx
(Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col
],
)
def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gmap):
# test gmap given as Series that it aligns to the the data including subset
expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap)._compute()
result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute()
assert expected.ctx == result.ctx
@pytest.mark.parametrize(
"gmap, axis",
[
(DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1),
(DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0),
],
)
def test_background_gradient_gmap_wrong_dataframe(styler_blank, gmap, axis):
# test giving a gmap in DataFrame but with wrong axis
msg = "'gmap' is a DataFrame but underlying data for operations is a Series"
with pytest.raises(ValueError, match=msg):
styler_blank.background_gradient(gmap=gmap, axis=axis)._compute()
def test_background_gradient_gmap_wrong_series(styler_blank):
# test giving a gmap in Series form but with wrong axis
msg = "'gmap' is a Series but underlying data for operations is a DataFrame"
gmap = Series([1, 2], index=["X", "Y"])
with pytest.raises(ValueError, match=msg):
styler_blank.background_gradient(gmap=gmap, axis=None)._compute()
@pytest.mark.parametrize("cmap", ["PuBu", mpl.cm.get_cmap("PuBu")])
def test_bar_colormap(cmap):
data = DataFrame([[1, 2], [3, 4]])
ctx = data.style.bar(cmap=cmap, axis=None)._compute().ctx
pubu_colors = {
(0, 0): "#d0d1e6",
(1, 0): "#056faf",
(0, 1): "#73a9cf",
(1, 1): "#023858",
}
for k, v in pubu_colors.items():
assert v in ctx[k][1][1]
def test_bar_color_raises(df):
msg = "`color` must be string or list or tuple of 2 strings"
with pytest.raises(ValueError, match=msg):
df.style.bar(color={"a", "b"}).to_html()
with pytest.raises(ValueError, match=msg):
df.style.bar(color=["a", "b", "c"]).to_html()
msg = "`color` and `cmap` cannot both be given"
with pytest.raises(ValueError, match=msg):
df.style.bar(color="something", cmap="something else").to_html()

View File

@@ -0,0 +1,140 @@
from textwrap import dedent
import pytest
from pandas import (
DataFrame,
IndexSlice,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=["i", "j", "j"],
columns=["c", "d", "d"],
dtype=float,
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
def test_format_non_unique(df):
# GH 41269
# test dict
html = df.style.format({"d": "{:.1f}"}).to_html()
for val in ["1.000000<", "4.000000<", "7.000000<"]:
assert val in html
for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]:
assert val in html
# test subset
html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).to_html()
for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]:
assert val in html
for val in ["5.0<", "6.0<", "8.0<", "9.0<"]:
assert val in html
@pytest.mark.parametrize("func", ["apply", "applymap"])
def test_apply_applymap_non_unique_raises(df, func):
# GH 41269
if func == "apply":
op = lambda s: ["color: red;"] * len(s)
else:
op = lambda v: "color: red;"
with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"):
getattr(df.style, func)(op)._compute()
def test_table_styles_dict_non_unique_index(styler):
styles = styler.set_table_styles(
{"j": [{"selector": "td", "props": "a: v;"}]}, axis=1
).table_styles
assert styles == [
{"selector": "td.row1", "props": [("a", "v")]},
{"selector": "td.row2", "props": [("a", "v")]},
]
def test_table_styles_dict_non_unique_columns(styler):
styles = styler.set_table_styles(
{"d": [{"selector": "td", "props": "a: v;"}]}, axis=0
).table_styles
assert styles == [
{"selector": "td.col1", "props": [("a", "v")]},
{"selector": "td.col2", "props": [("a", "v")]},
]
def test_tooltips_non_unique_raises(styler):
# ttips has unique keys
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
styler.set_tooltips(ttips=ttips) # OK
# ttips has non-unique columns
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
styler.set_tooltips(ttips=ttips)
# ttips has non-unique index
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
styler.set_tooltips(ttips=ttips)
def test_set_td_classes_non_unique_raises(styler):
# classes has unique keys
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
styler.set_td_classes(classes=classes) # OK
# classes has non-unique columns
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
styler.set_td_classes(classes=classes)
# classes has non-unique index
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
styler.set_td_classes(classes=classes)
def test_hide_columns_non_unique(styler):
ctx = styler.hide(["d"], axis="columns")._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "c"
assert ctx["head"][0][1]["is_visible"] is True
assert ctx["head"][0][2]["display_value"] == "d"
assert ctx["head"][0][2]["is_visible"] is False
assert ctx["head"][0][3]["display_value"] == "d"
assert ctx["head"][0][3]["is_visible"] is False
assert ctx["body"][0][1]["is_visible"] is True
assert ctx["body"][0][2]["is_visible"] is False
assert ctx["body"][0][3]["is_visible"] is False
def test_latex_non_unique(styler):
result = styler.to_latex()
assert result == dedent(
"""\
\\begin{tabular}{lrrr}
& c & d & d \\\\
i & 1.000000 & 2.000000 & 3.000000 \\\\
j & 4.000000 & 5.000000 & 6.000000 \\\\
j & 7.000000 & 8.000000 & 9.000000 \\\\
\\end{tabular}
"""
)

View File

@@ -0,0 +1,999 @@
from textwrap import dedent
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
option_context,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
from pandas.io.formats.style_render import (
_parse_latex_cell_styles,
_parse_latex_css_conversion,
_parse_latex_header_span,
_parse_latex_table_styles,
_parse_latex_table_wrapping,
)
@pytest.fixture
def df():
return DataFrame({"A": [0, 1], "B": [-0.61, -1.22], "C": ["ab", "cd"]})
@pytest.fixture
def df_ext():
return DataFrame(
{"A": [0, 1, 2], "B": [-0.61, -1.22, -2.22], "C": ["ab", "cd", "de"]}
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0, precision=2)
def test_minimal_latex_tabular(styler):
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& A & B & C \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
assert styler.to_latex() == expected
def test_tabular_hrules(styler):
expected = dedent(
"""\
\\begin{tabular}{lrrl}
\\toprule
& A & B & C \\\\
\\midrule
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\bottomrule
\\end{tabular}
"""
)
assert styler.to_latex(hrules=True) == expected
def test_tabular_custom_hrules(styler):
styler.set_table_styles(
[
{"selector": "toprule", "props": ":hline"},
{"selector": "bottomrule", "props": ":otherline"},
]
) # no midrule
expected = dedent(
"""\
\\begin{tabular}{lrrl}
\\hline
& A & B & C \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\otherline
\\end{tabular}
"""
)
assert styler.to_latex() == expected
def test_column_format(styler):
# default setting is already tested in `test_latex_minimal_tabular`
styler.set_table_styles([{"selector": "column_format", "props": ":cccc"}])
assert "\\begin{tabular}{rrrr}" in styler.to_latex(column_format="rrrr")
styler.set_table_styles([{"selector": "column_format", "props": ":r|r|cc"}])
assert "\\begin{tabular}{r|r|cc}" in styler.to_latex()
def test_siunitx_cols(styler):
expected = dedent(
"""\
\\begin{tabular}{lSSl}
{} & {A} & {B} & {C} \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
assert styler.to_latex(siunitx=True) == expected
def test_position(styler):
assert "\\begin{table}[h!]" in styler.to_latex(position="h!")
assert "\\end{table}" in styler.to_latex(position="h!")
styler.set_table_styles([{"selector": "position", "props": ":b!"}])
assert "\\begin{table}[b!]" in styler.to_latex()
assert "\\end{table}" in styler.to_latex()
@pytest.mark.parametrize("env", [None, "longtable"])
def test_label(styler, env):
assert "\n\\label{text}" in styler.to_latex(label="text", environment=env)
styler.set_table_styles([{"selector": "label", "props": ":{more §text}"}])
assert "\n\\label{more :text}" in styler.to_latex(environment=env)
def test_position_float_raises(styler):
msg = "`position_float` should be one of 'raggedright', 'raggedleft', 'centering',"
with pytest.raises(ValueError, match=msg):
styler.to_latex(position_float="bad_string")
msg = "`position_float` cannot be used in 'longtable' `environment`"
with pytest.raises(ValueError, match=msg):
styler.to_latex(position_float="centering", environment="longtable")
@pytest.mark.parametrize("label", [(None, ""), ("text", "\\label{text}")])
@pytest.mark.parametrize("position", [(None, ""), ("h!", "{table}[h!]")])
@pytest.mark.parametrize("caption", [(None, ""), ("text", "\\caption{text}")])
@pytest.mark.parametrize("column_format", [(None, ""), ("rcrl", "{tabular}{rcrl}")])
@pytest.mark.parametrize("position_float", [(None, ""), ("centering", "\\centering")])
def test_kwargs_combinations(
styler, label, position, caption, column_format, position_float
):
result = styler.to_latex(
label=label[0],
position=position[0],
caption=caption[0],
column_format=column_format[0],
position_float=position_float[0],
)
assert label[1] in result
assert position[1] in result
assert caption[1] in result
assert column_format[1] in result
assert position_float[1] in result
def test_custom_table_styles(styler):
styler.set_table_styles(
[
{"selector": "mycommand", "props": ":{myoptions}"},
{"selector": "mycommand2", "props": ":{myoptions2}"},
]
)
expected = dedent(
"""\
\\begin{table}
\\mycommand{myoptions}
\\mycommand2{myoptions2}
"""
)
assert expected in styler.to_latex()
def test_cell_styling(styler):
styler.highlight_max(props="itshape:;Huge:--wrap;")
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& A & B & C \\\\
0 & 0 & \\itshape {\\Huge -0.61} & ab \\\\
1 & \\itshape {\\Huge 1} & -1.22 & \\itshape {\\Huge cd} \\\\
\\end{tabular}
"""
)
assert expected == styler.to_latex()
def test_multiindex_columns(df):
cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df.columns = cidx
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& \\multicolumn{2}{r}{A} & B \\\\
& a & b & c \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
s = df.style.format(precision=2)
assert expected == s.to_latex()
# non-sparse
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& A & A & B \\\\
& a & b & c \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
s = df.style.format(precision=2)
assert expected == s.to_latex(sparse_columns=False)
def test_multiindex_row(df_ext):
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index = ridx
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & A & B & C \\\\
\\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\
& b & 1 & -1.22 & cd \\\\
B & c & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
styler = df_ext.style.format(precision=2)
result = styler.to_latex()
assert expected == result
# non-sparse
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & A & B & C \\\\
A & a & 0 & -0.61 & ab \\\\
A & b & 1 & -1.22 & cd \\\\
B & c & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
result = styler.to_latex(sparse_index=False)
assert expected == result
def test_multirow_naive(df_ext):
ridx = MultiIndex.from_tuples([("X", "x"), ("X", "y"), ("Y", "z")])
df_ext.index = ridx
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & A & B & C \\\\
X & x & 0 & -0.61 & ab \\\\
& y & 1 & -1.22 & cd \\\\
Y & z & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
styler = df_ext.style.format(precision=2)
result = styler.to_latex(multirow_align="naive")
assert expected == result
def test_multiindex_row_and_col(df_ext):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & \\multicolumn{2}{l}{Z} & Y \\\\
& & a & b & c \\\\
\\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\
& b & 1 & -1.22 & cd \\\\
B & c & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
styler = df_ext.style.format(precision=2)
result = styler.to_latex(multirow_align="b", multicol_align="l")
assert result == expected
# non-sparse
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & Z & Z & Y \\\\
& & a & b & c \\\\
A & a & 0 & -0.61 & ab \\\\
A & b & 1 & -1.22 & cd \\\\
B & c & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
result = styler.to_latex(sparse_index=False, sparse_columns=False)
assert result == expected
@pytest.mark.parametrize(
"multicol_align, siunitx, header",
[
("naive-l", False, " & A & &"),
("naive-r", False, " & & & A"),
("naive-l", True, "{} & {A} & {} & {}"),
("naive-r", True, "{} & {} & {} & {A}"),
],
)
def test_multicol_naive(df, multicol_align, siunitx, header):
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")])
df.columns = ridx
level1 = " & a & b & c" if not siunitx else "{} & {a} & {b} & {c}"
col_format = "lrrl" if not siunitx else "lSSl"
expected = dedent(
f"""\
\\begin{{tabular}}{{{col_format}}}
{header} \\\\
{level1} \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{{tabular}}
"""
)
styler = df.style.format(precision=2)
result = styler.to_latex(multicol_align=multicol_align, siunitx=siunitx)
assert expected == result
def test_multi_options(df_ext):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
styler = df_ext.style.format(precision=2)
expected = dedent(
"""\
& & \\multicolumn{2}{r}{Z} & Y \\\\
& & a & b & c \\\\
\\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\
"""
)
result = styler.to_latex()
assert expected in result
with option_context("styler.latex.multicol_align", "l"):
assert " & & \\multicolumn{2}{l}{Z} & Y \\\\" in styler.to_latex()
with option_context("styler.latex.multirow_align", "b"):
assert "\\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\" in styler.to_latex()
def test_multiindex_columns_hidden():
df = DataFrame([[1, 2, 3, 4]])
df.columns = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3), ("B", 1)])
s = df.style
assert "{tabular}{lrrrr}" in s.to_latex()
s.set_table_styles([]) # reset the position command
s.hide([("A", 2)], axis="columns")
assert "{tabular}{lrrr}" in s.to_latex()
@pytest.mark.parametrize(
"option, value",
[
("styler.sparse.index", True),
("styler.sparse.index", False),
("styler.sparse.columns", True),
("styler.sparse.columns", False),
],
)
def test_sparse_options(df_ext, option, value):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
styler = df_ext.style
latex1 = styler.to_latex()
with option_context(option, value):
latex2 = styler.to_latex()
assert (latex1 == latex2) is value
def test_hidden_index(styler):
styler.hide(axis="index")
expected = dedent(
"""\
\\begin{tabular}{rrl}
A & B & C \\\\
0 & -0.61 & ab \\\\
1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
assert styler.to_latex() == expected
@pytest.mark.parametrize("environment", ["table", "figure*", None])
def test_comprehensive(df_ext, environment):
# test as many low level features simultaneously as possible
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
stlr = df_ext.style
stlr.set_caption("mycap")
stlr.set_table_styles(
[
{"selector": "label", "props": ":{fig§item}"},
{"selector": "position", "props": ":h!"},
{"selector": "position_float", "props": ":centering"},
{"selector": "column_format", "props": ":rlrlr"},
{"selector": "toprule", "props": ":toprule"},
{"selector": "midrule", "props": ":midrule"},
{"selector": "bottomrule", "props": ":bottomrule"},
{"selector": "rowcolors", "props": ":{3}{pink}{}"}, # custom command
]
)
stlr.highlight_max(axis=0, props="textbf:--rwrap;cellcolor:[rgb]{1,1,0.6}--rwrap")
stlr.highlight_max(axis=None, props="Huge:--wrap;", subset=[("Z", "a"), ("Z", "b")])
expected = (
"""\
\\begin{table}[h!]
\\centering
\\caption{mycap}
\\label{fig:item}
\\rowcolors{3}{pink}{}
\\begin{tabular}{rlrlr}
\\toprule
& & \\multicolumn{2}{r}{Z} & Y \\\\
& & a & b & c \\\\
\\midrule
\\multirow[c]{2}{*}{A} & a & 0 & \\textbf{\\cellcolor[rgb]{1,1,0.6}{-0.61}} & ab \\\\
& b & 1 & -1.22 & cd \\\\
B & c & \\textbf{\\cellcolor[rgb]{1,1,0.6}{{\\Huge 2}}} & -2.22 & """
"""\
\\textbf{\\cellcolor[rgb]{1,1,0.6}{de}} \\\\
\\bottomrule
\\end{tabular}
\\end{table}
"""
).replace("table", environment if environment else "table")
result = stlr.format(precision=2).to_latex(environment=environment)
assert result == expected
def test_environment_option(styler):
with option_context("styler.latex.environment", "bar-env"):
assert "\\begin{bar-env}" in styler.to_latex()
assert "\\begin{foo-env}" in styler.to_latex(environment="foo-env")
def test_parse_latex_table_styles(styler):
styler.set_table_styles(
[
{"selector": "foo", "props": [("attr", "value")]},
{"selector": "bar", "props": [("attr", "overwritten")]},
{"selector": "bar", "props": [("attr", "baz"), ("attr2", "ignored")]},
{"selector": "label", "props": [("", "{fig§item}")]},
]
)
assert _parse_latex_table_styles(styler.table_styles, "bar") == "baz"
# test '§' replaced by ':' [for CSS compatibility]
assert _parse_latex_table_styles(styler.table_styles, "label") == "{fig:item}"
def test_parse_latex_cell_styles_basic(): # test nesting
cell_style = [("itshape", "--rwrap"), ("cellcolor", "[rgb]{0,1,1}--rwrap")]
expected = "\\itshape{\\cellcolor[rgb]{0,1,1}{text}}"
assert _parse_latex_cell_styles(cell_style, "text") == expected
@pytest.mark.parametrize(
"wrap_arg, expected",
[ # test wrapping
("", "\\<command><options> <display_value>"),
("--wrap", "{\\<command><options> <display_value>}"),
("--nowrap", "\\<command><options> <display_value>"),
("--lwrap", "{\\<command><options>} <display_value>"),
("--dwrap", "{\\<command><options>}{<display_value>}"),
("--rwrap", "\\<command><options>{<display_value>}"),
],
)
def test_parse_latex_cell_styles_braces(wrap_arg, expected):
cell_style = [("<command>", f"<options>{wrap_arg}")]
assert _parse_latex_cell_styles(cell_style, "<display_value>") == expected
def test_parse_latex_header_span():
cell = {"attributes": 'colspan="3"', "display_value": "text", "cellstyle": []}
expected = "\\multicolumn{3}{Y}{text}"
assert _parse_latex_header_span(cell, "X", "Y") == expected
cell = {"attributes": 'rowspan="5"', "display_value": "text", "cellstyle": []}
expected = "\\multirow[X]{5}{*}{text}"
assert _parse_latex_header_span(cell, "X", "Y") == expected
cell = {"display_value": "text", "cellstyle": []}
assert _parse_latex_header_span(cell, "X", "Y") == "text"
cell = {"display_value": "text", "cellstyle": [("bfseries", "--rwrap")]}
assert _parse_latex_header_span(cell, "X", "Y") == "\\bfseries{text}"
def test_parse_latex_table_wrapping(styler):
styler.set_table_styles(
[
{"selector": "toprule", "props": ":value"},
{"selector": "bottomrule", "props": ":value"},
{"selector": "midrule", "props": ":value"},
{"selector": "column_format", "props": ":value"},
]
)
assert _parse_latex_table_wrapping(styler.table_styles, styler.caption) is False
assert _parse_latex_table_wrapping(styler.table_styles, "some caption") is True
styler.set_table_styles(
[
{"selector": "not-ignored", "props": ":value"},
],
overwrite=False,
)
assert _parse_latex_table_wrapping(styler.table_styles, None) is True
def test_short_caption(styler):
result = styler.to_latex(caption=("full cap", "short cap"))
assert "\\caption[short cap]{full cap}" in result
@pytest.mark.parametrize(
"css, expected",
[
([("color", "red")], [("color", "{red}")]), # test color and input format types
(
[("color", "rgb(128, 128, 128 )")],
[("color", "[rgb]{0.502, 0.502, 0.502}")],
),
(
[("color", "rgb(128, 50%, 25% )")],
[("color", "[rgb]{0.502, 0.500, 0.250}")],
),
(
[("color", "rgba(128,128,128,1)")],
[("color", "[rgb]{0.502, 0.502, 0.502}")],
),
([("color", "#FF00FF")], [("color", "[HTML]{FF00FF}")]),
([("color", "#F0F")], [("color", "[HTML]{FF00FF}")]),
([("font-weight", "bold")], [("bfseries", "")]), # test font-weight and types
([("font-weight", "bolder")], [("bfseries", "")]),
([("font-weight", "normal")], []),
([("background-color", "red")], [("cellcolor", "{red}--lwrap")]),
(
[("background-color", "#FF00FF")], # test background-color command and wrap
[("cellcolor", "[HTML]{FF00FF}--lwrap")],
),
([("font-style", "italic")], [("itshape", "")]), # test font-style and types
([("font-style", "oblique")], [("slshape", "")]),
([("font-style", "normal")], []),
([("color", "red /*--dwrap*/")], [("color", "{red}--dwrap")]), # css comments
([("background-color", "red /* --dwrap */")], [("cellcolor", "{red}--dwrap")]),
],
)
def test_parse_latex_css_conversion(css, expected):
result = _parse_latex_css_conversion(css)
assert result == expected
@pytest.mark.parametrize(
"env, inner_env",
[
(None, "tabular"),
("table", "tabular"),
("longtable", "longtable"),
],
)
@pytest.mark.parametrize(
"convert, exp", [(True, "bfseries"), (False, "font-weightbold")]
)
def test_parse_latex_css_convert_minimal(styler, env, inner_env, convert, exp):
# parameters ensure longtable template is also tested
styler.highlight_max(props="font-weight:bold;")
result = styler.to_latex(convert_css=convert, environment=env)
expected = dedent(
f"""\
0 & 0 & \\{exp} -0.61 & ab \\\\
1 & \\{exp} 1 & -1.22 & \\{exp} cd \\\\
\\end{{{inner_env}}}
"""
)
assert expected in result
def test_parse_latex_css_conversion_option():
css = [("command", "option--latex--wrap")]
expected = [("command", "option--wrap")]
result = _parse_latex_css_conversion(css)
assert result == expected
def test_styler_object_after_render(styler):
# GH 42320
pre_render = styler._copy(deepcopy=True)
styler.to_latex(
column_format="rllr",
position="h",
position_float="centering",
hrules=True,
label="my lab",
caption="my cap",
)
assert pre_render.table_styles == styler.table_styles
assert pre_render.caption == styler.caption
def test_longtable_comprehensive(styler):
result = styler.to_latex(
environment="longtable", hrules=True, label="fig:A", caption=("full", "short")
)
expected = dedent(
"""\
\\begin{longtable}{lrrl}
\\caption[short]{full} \\label{fig:A} \\\\
\\toprule
& A & B & C \\\\
\\midrule
\\endfirsthead
\\caption[]{full} \\\\
\\toprule
& A & B & C \\\\
\\midrule
\\endhead
\\midrule
\\multicolumn{4}{r}{Continued on next page} \\\\
\\midrule
\\endfoot
\\bottomrule
\\endlastfoot
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{longtable}
"""
)
assert result == expected
def test_longtable_minimal(styler):
result = styler.to_latex(environment="longtable")
expected = dedent(
"""\
\\begin{longtable}{lrrl}
& A & B & C \\\\
\\endfirsthead
& A & B & C \\\\
\\endhead
\\multicolumn{4}{r}{Continued on next page} \\\\
\\endfoot
\\endlastfoot
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{longtable}
"""
)
assert result == expected
@pytest.mark.parametrize(
"sparse, exp, siunitx",
[
(True, "{} & \\multicolumn{2}{r}{A} & {B}", True),
(False, "{} & {A} & {A} & {B}", True),
(True, " & \\multicolumn{2}{r}{A} & B", False),
(False, " & A & A & B", False),
],
)
def test_longtable_multiindex_columns(df, sparse, exp, siunitx):
cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df.columns = cidx
with_si = "{} & {a} & {b} & {c} \\\\"
without_si = " & a & b & c \\\\"
expected = dedent(
f"""\
\\begin{{longtable}}{{l{"SS" if siunitx else "rr"}l}}
{exp} \\\\
{with_si if siunitx else without_si}
\\endfirsthead
{exp} \\\\
{with_si if siunitx else without_si}
\\endhead
"""
)
result = df.style.to_latex(
environment="longtable", sparse_columns=sparse, siunitx=siunitx
)
assert expected in result
@pytest.mark.parametrize(
"caption, cap_exp",
[
("full", ("{full}", "")),
(("full", "short"), ("{full}", "[short]")),
],
)
@pytest.mark.parametrize("label, lab_exp", [(None, ""), ("tab:A", " \\label{tab:A}")])
def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp):
cap_exp1 = f"\\caption{cap_exp[1]}{cap_exp[0]}"
cap_exp2 = f"\\caption[]{cap_exp[0]}"
expected = dedent(
f"""\
{cap_exp1}{lab_exp} \\\\
& A & B & C \\\\
\\endfirsthead
{cap_exp2} \\\\
"""
)
assert expected in styler.to_latex(
environment="longtable", caption=caption, label=label
)
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize(
"columns, siunitx",
[
(True, True),
(True, False),
(False, False),
],
)
def test_apply_map_header_render_mi(df_ext, index, columns, siunitx):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
styler = df_ext.style
func = lambda v: "bfseries: --rwrap" if "A" in v or "Z" in v or "c" in v else None
if index:
styler.applymap_index(func, axis="index")
if columns:
styler.applymap_index(func, axis="columns")
result = styler.to_latex(siunitx=siunitx)
expected_index = dedent(
"""\
\\multirow[c]{2}{*}{\\bfseries{A}} & a & 0 & -0.610000 & ab \\\\
\\bfseries{} & b & 1 & -1.220000 & cd \\\\
B & \\bfseries{c} & 2 & -2.220000 & de \\\\
"""
)
assert (expected_index in result) is index
exp_cols_si = dedent(
"""\
{} & {} & \\multicolumn{2}{r}{\\bfseries{Z}} & {Y} \\\\
{} & {} & {a} & {b} & {\\bfseries{c}} \\\\
"""
)
exp_cols_no_si = """\
& & \\multicolumn{2}{r}{\\bfseries{Z}} & Y \\\\
& & a & b & \\bfseries{c} \\\\
"""
assert ((exp_cols_si if siunitx else exp_cols_no_si) in result) is columns
def test_repr_option(styler):
assert "<style" in styler._repr_html_()[:6]
assert styler._repr_latex_() is None
with option_context("styler.render.repr", "latex"):
assert "\\begin{tabular}" in styler._repr_latex_()[:15]
assert styler._repr_html_() is None
@pytest.mark.parametrize("option", ["hrules"])
def test_bool_options(styler, option):
with option_context(f"styler.latex.{option}", False):
latex_false = styler.to_latex()
with option_context(f"styler.latex.{option}", True):
latex_true = styler.to_latex()
assert latex_false != latex_true # options are reactive under to_latex(*no_args)
def test_siunitx_basic_headers(styler):
assert "{} & {A} & {B} & {C} \\\\" in styler.to_latex(siunitx=True)
assert " & A & B & C \\\\" in styler.to_latex() # default siunitx=False
@pytest.mark.parametrize("axis", ["index", "columns"])
def test_css_convert_apply_index(styler, axis):
styler.applymap_index(lambda x: "font-weight: bold;", axis=axis)
for label in getattr(styler, axis):
assert f"\\bfseries {label}" in styler.to_latex(convert_css=True)
def test_hide_index_latex(styler):
# GH 43637
styler.hide([0], axis=0)
result = styler.to_latex()
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& A & B & C \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
assert expected == result
def test_latex_hiding_index_columns_multiindex_alignment():
# gh 43644
midx = MultiIndex.from_product(
[["i0", "j0"], ["i1"], ["i2", "j2"]], names=["i-0", "i-1", "i-2"]
)
cidx = MultiIndex.from_product(
[["c0"], ["c1", "d1"], ["c2", "d2"]], names=["c-0", "c-1", "c-2"]
)
df = DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=cidx)
styler = Styler(df, uuid_len=0)
styler.hide(level=1, axis=0).hide(level=0, axis=1)
styler.hide([("i0", "i1", "i2")], axis=0)
styler.hide([("c0", "c1", "c2")], axis=1)
styler.applymap(lambda x: "color:{red};" if x == 5 else "")
styler.applymap_index(lambda x: "color:{blue};" if "j" in x else "")
result = styler.to_latex()
expected = dedent(
"""\
\\begin{tabular}{llrrr}
& c-1 & c1 & \\multicolumn{2}{r}{d1} \\\\
& c-2 & d2 & c2 & d2 \\\\
i-0 & i-2 & & & \\\\
i0 & \\color{blue} j2 & \\color{red} 5 & 6 & 7 \\\\
\\multirow[c]{2}{*}{\\color{blue} j0} & i2 & 9 & 10 & 11 \\\\
\\color{blue} & \\color{blue} j2 & 13 & 14 & 15 \\\\
\\end{tabular}
"""
)
assert result == expected
def test_rendered_links():
# note the majority of testing is done in test_html.py: test_rendered_links
# these test only the alternative latex format is functional
df = DataFrame(["text www.domain.com text"])
result = df.style.format(hyperlinks="latex").to_latex()
assert r"text \href{www.domain.com}{www.domain.com} text" in result
def test_apply_index_hidden_levels():
# gh 45156
styler = DataFrame(
[[1]],
index=MultiIndex.from_tuples([(0, 1)], names=["l0", "l1"]),
columns=MultiIndex.from_tuples([(0, 1)], names=["c0", "c1"]),
).style
styler.hide(level=1)
styler.applymap_index(lambda v: "color: red;", level=0, axis=1)
result = styler.to_latex(convert_css=True)
expected = dedent(
"""\
\\begin{tabular}{lr}
c0 & \\color{red} 0 \\\\
c1 & 1 \\\\
l0 & \\\\
0 & 1 \\\\
\\end{tabular}
"""
)
assert result == expected
@pytest.mark.parametrize("clines", ["bad", "index", "skip-last", "all", "data"])
def test_clines_validation(clines, styler):
msg = f"`clines` value of {clines} is invalid."
with pytest.raises(ValueError, match=msg):
styler.to_latex(clines=clines)
@pytest.mark.parametrize(
"clines, exp",
[
("all;index", "\n\\cline{1-1}"),
("all;data", "\n\\cline{1-2}"),
("skip-last;index", ""),
("skip-last;data", ""),
(None, ""),
],
)
@pytest.mark.parametrize("env", ["table", "longtable"])
def test_clines_index(clines, exp, env):
df = DataFrame([[1], [2], [3], [4]])
result = df.style.to_latex(clines=clines, environment=env)
expected = f"""\
0 & 1 \\\\{exp}
1 & 2 \\\\{exp}
2 & 3 \\\\{exp}
3 & 4 \\\\{exp}
"""
assert expected in result
@pytest.mark.parametrize(
"clines, expected",
[
(
None,
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
& Y & 2 \\\\
\\multirow[c]{2}{*}{B} & X & 3 \\\\
& Y & 4 \\\\
"""
),
),
(
"skip-last;index",
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
& Y & 2 \\\\
\\cline{1-2}
\\multirow[c]{2}{*}{B} & X & 3 \\\\
& Y & 4 \\\\
\\cline{1-2}
"""
),
),
(
"skip-last;data",
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
& Y & 2 \\\\
\\cline{1-3}
\\multirow[c]{2}{*}{B} & X & 3 \\\\
& Y & 4 \\\\
\\cline{1-3}
"""
),
),
(
"all;index",
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
\\cline{2-2}
& Y & 2 \\\\
\\cline{1-2} \\cline{2-2}
\\multirow[c]{2}{*}{B} & X & 3 \\\\
\\cline{2-2}
& Y & 4 \\\\
\\cline{1-2} \\cline{2-2}
"""
),
),
(
"all;data",
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
\\cline{2-3}
& Y & 2 \\\\
\\cline{1-3} \\cline{2-3}
\\multirow[c]{2}{*}{B} & X & 3 \\\\
\\cline{2-3}
& Y & 4 \\\\
\\cline{1-3} \\cline{2-3}
"""
),
),
],
)
@pytest.mark.parametrize("env", ["table"])
def test_clines_multiindex(clines, expected, env):
# also tests simultaneously with hidden rows and a hidden multiindex level
midx = MultiIndex.from_product([["A", "-", "B"], [0], ["X", "Y"]])
df = DataFrame([[1], [2], [99], [99], [3], [4]], index=midx)
styler = df.style
styler.hide([("-", 0, "X"), ("-", 0, "Y")])
styler.hide(level=1)
result = styler.to_latex(clines=clines, environment=env)
assert expected in result
def test_col_format_len(styler):
# gh 46037
result = styler.to_latex(environment="longtable", column_format="lrr{10cm}")
expected = r"\multicolumn{4}{r}{Continued on next page} \\"
assert expected in result

View File

@@ -0,0 +1,85 @@
import numpy as np
import pytest
from pandas import DataFrame
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame(
data=[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
columns=["A", "B", "C"],
index=["x", "y", "z"],
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
@pytest.mark.parametrize(
"ttips",
[
DataFrame( # Test basic reindex and ignoring blank
data=[["Min", "Max"], [np.nan, ""]],
columns=["A", "C"],
index=["x", "y"],
),
DataFrame( # Test non-referenced columns, reversed col names, short index
data=[["Max", "Min", "Bad-Col"]], columns=["C", "A", "D"], index=["x"]
),
],
)
def test_tooltip_render(ttips, styler):
# GH 21266
result = styler.set_tooltips(ttips).to_html()
# test tooltip table level class
assert "#T_ .pd-t {\n visibility: hidden;\n" in result
# test 'Min' tooltip added
assert "#T_ #T__row0_col0:hover .pd-t {\n visibility: visible;\n}" in result
assert '#T_ #T__row0_col0 .pd-t::after {\n content: "Min";\n}' in result
assert 'class="data row0 col0" >0<span class="pd-t"></span></td>' in result
# test 'Max' tooltip added
assert "#T_ #T__row0_col2:hover .pd-t {\n visibility: visible;\n}" in result
assert '#T_ #T__row0_col2 .pd-t::after {\n content: "Max";\n}' in result
assert 'class="data row0 col2" >2<span class="pd-t"></span></td>' in result
# test Nan, empty string and bad column ignored
assert "#T_ #T__row1_col0:hover .pd-t {\n visibility: visible;\n}" not in result
assert "#T_ #T__row1_col1:hover .pd-t {\n visibility: visible;\n}" not in result
assert "#T_ #T__row0_col1:hover .pd-t {\n visibility: visible;\n}" not in result
assert "#T_ #T__row1_col2:hover .pd-t {\n visibility: visible;\n}" not in result
assert "Bad-Col" not in result
def test_tooltip_ignored(styler):
# GH 21266
result = styler.to_html() # no set_tooltips() creates no <span>
assert '<style type="text/css">\n</style>' in result
assert '<span class="pd-t"></span>' not in result
def test_tooltip_css_class(styler):
# GH 21266
result = styler.set_tooltips(
DataFrame([["tooltip"]], index=["x"], columns=["A"]),
css_class="other-class",
props=[("color", "green")],
).to_html()
assert "#T_ .other-class {\n color: green;\n" in result
assert '#T_ #T__row0_col0 .other-class::after {\n content: "tooltip";\n' in result
# GH 39563
result = styler.set_tooltips( # set_tooltips overwrites previous
DataFrame([["tooltip"]], index=["x"], columns=["A"]),
css_class="another-class",
props="color:green;color:red;",
).to_html()
assert "#T_ .another-class {\n color: green;\n color: red;\n}" in result

View File

@@ -0,0 +1,72 @@
import locale
import pytest
from pandas._config import detect_console_encoding
class MockEncoding:
"""
Used to add a side effect when accessing the 'encoding' property. If the
side effect is a str in nature, the value will be returned. Otherwise, the
side effect should be an exception that will be raised.
"""
def __init__(self, encoding):
super().__init__()
self.val = encoding
@property
def encoding(self):
return self.raise_or_return(self.val)
@staticmethod
def raise_or_return(val):
if isinstance(val, str):
return val
else:
raise val
@pytest.mark.parametrize("empty,filled", [["stdin", "stdout"], ["stdout", "stdin"]])
def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
# Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when
# they have values filled.
# GH 21552
with monkeypatch.context() as context:
context.setattr(f"sys.{empty}", MockEncoding(""))
context.setattr(f"sys.{filled}", MockEncoding(filled))
assert detect_console_encoding() == filled
@pytest.mark.parametrize("encoding", [AttributeError, OSError, "ascii"])
def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
# GH 21552
with monkeypatch.context() as context:
context.setattr("locale.getpreferredencoding", lambda: "foo")
context.setattr("sys.stdout", MockEncoding(encoding))
assert detect_console_encoding() == "foo"
@pytest.mark.parametrize(
"std,locale",
[
["ascii", "ascii"],
["ascii", locale.Error],
[AttributeError, "ascii"],
[AttributeError, locale.Error],
[OSError, "ascii"],
[OSError, locale.Error],
],
)
def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):
# When both the stdout/stdin encoding and locale preferred encoding checks
# fail (or return 'ascii', we should default to the sys default encoding.
# GH 21552
with monkeypatch.context() as context:
context.setattr(
"locale.getpreferredencoding", lambda: MockEncoding.raise_or_return(locale)
)
context.setattr("sys.stdout", MockEncoding(std))
context.setattr("sys.getdefaultencoding", lambda: "sysDefaultEncoding")
assert detect_console_encoding() == "sysDefaultEncoding"

View File

@@ -0,0 +1,229 @@
import pytest
import pandas._testing as tm
from pandas.io.formats.css import (
CSSResolver,
CSSWarning,
)
def assert_resolves(css, props, inherited=None):
resolve = CSSResolver()
actual = resolve(css, inherited=inherited)
assert props == actual
def assert_same_resolution(css1, css2, inherited=None):
resolve = CSSResolver()
resolved1 = resolve(css1, inherited=inherited)
resolved2 = resolve(css2, inherited=inherited)
assert resolved1 == resolved2
@pytest.mark.parametrize(
"name,norm,abnorm",
[
(
"whitespace",
"hello: world; foo: bar",
" \t hello \t :\n world \n ; \n foo: \tbar\n\n",
),
("case", "hello: world; foo: bar", "Hello: WORLD; foO: bar"),
("empty-decl", "hello: world; foo: bar", "; hello: world;; foo: bar;\n; ;"),
("empty-list", "", ";"),
],
)
def test_css_parse_normalisation(name, norm, abnorm):
assert_same_resolution(norm, abnorm)
@pytest.mark.parametrize(
"invalid_css,remainder",
[
# No colon
("hello-world", ""),
("border-style: solid; hello-world", "border-style: solid"),
(
"border-style: solid; hello-world; font-weight: bold",
"border-style: solid; font-weight: bold",
),
# Unclosed string fail
# Invalid size
("font-size: blah", "font-size: 1em"),
("font-size: 1a2b", "font-size: 1em"),
("font-size: 1e5pt", "font-size: 1em"),
("font-size: 1+6pt", "font-size: 1em"),
("font-size: 1unknownunit", "font-size: 1em"),
("font-size: 10", "font-size: 1em"),
("font-size: 10 pt", "font-size: 1em"),
],
)
def test_css_parse_invalid(invalid_css, remainder):
with tm.assert_produces_warning(CSSWarning):
assert_same_resolution(invalid_css, remainder)
@pytest.mark.parametrize(
"shorthand,expansions",
[
("margin", ["margin-top", "margin-right", "margin-bottom", "margin-left"]),
("padding", ["padding-top", "padding-right", "padding-bottom", "padding-left"]),
(
"border-width",
[
"border-top-width",
"border-right-width",
"border-bottom-width",
"border-left-width",
],
),
(
"border-color",
[
"border-top-color",
"border-right-color",
"border-bottom-color",
"border-left-color",
],
),
(
"border-style",
[
"border-top-style",
"border-right-style",
"border-bottom-style",
"border-left-style",
],
),
],
)
def test_css_side_shorthands(shorthand, expansions):
top, right, bottom, left = expansions
assert_resolves(
f"{shorthand}: 1pt", {top: "1pt", right: "1pt", bottom: "1pt", left: "1pt"}
)
assert_resolves(
f"{shorthand}: 1pt 4pt", {top: "1pt", right: "4pt", bottom: "1pt", left: "4pt"}
)
assert_resolves(
f"{shorthand}: 1pt 4pt 2pt",
{top: "1pt", right: "4pt", bottom: "2pt", left: "4pt"},
)
assert_resolves(
f"{shorthand}: 1pt 4pt 2pt 0pt",
{top: "1pt", right: "4pt", bottom: "2pt", left: "0pt"},
)
with tm.assert_produces_warning(CSSWarning):
assert_resolves(f"{shorthand}: 1pt 1pt 1pt 1pt 1pt", {})
@pytest.mark.parametrize(
"style,inherited,equiv",
[
("margin: 1px; margin: 2px", "", "margin: 2px"),
("margin: 1px", "margin: 2px", "margin: 1px"),
("margin: 1px; margin: inherit", "margin: 2px", "margin: 2px"),
(
"margin: 1px; margin-top: 2px",
"",
"margin-left: 1px; margin-right: 1px; "
+ "margin-bottom: 1px; margin-top: 2px",
),
("margin-top: 2px", "margin: 1px", "margin: 1px; margin-top: 2px"),
("margin: 1px", "margin-top: 2px", "margin: 1px"),
(
"margin: 1px; margin-top: inherit",
"margin: 2px",
"margin: 1px; margin-top: 2px",
),
],
)
def test_css_precedence(style, inherited, equiv):
resolve = CSSResolver()
inherited_props = resolve(inherited)
style_props = resolve(style, inherited=inherited_props)
equiv_props = resolve(equiv)
assert style_props == equiv_props
@pytest.mark.parametrize(
"style,equiv",
[
(
"margin: 1px; margin-top: inherit",
"margin-bottom: 1px; margin-right: 1px; margin-left: 1px",
),
("margin-top: inherit", ""),
("margin-top: initial", ""),
],
)
def test_css_none_absent(style, equiv):
assert_same_resolution(style, equiv)
@pytest.mark.parametrize(
"size,resolved",
[
("xx-small", "6pt"),
("x-small", f"{7.5:f}pt"),
("small", f"{9.6:f}pt"),
("medium", "12pt"),
("large", f"{13.5:f}pt"),
("x-large", "18pt"),
("xx-large", "24pt"),
("8px", "6pt"),
("1.25pc", "15pt"),
(".25in", "18pt"),
("02.54cm", "72pt"),
("25.4mm", "72pt"),
("101.6q", "72pt"),
("101.6q", "72pt"),
],
)
@pytest.mark.parametrize("relative_to", [None, "16pt"]) # invariant to inherited size
def test_css_absolute_font_size(size, relative_to, resolved):
if relative_to is None:
inherited = None
else:
inherited = {"font-size": relative_to}
assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)
@pytest.mark.parametrize(
"size,relative_to,resolved",
[
("1em", None, "12pt"),
("1.0em", None, "12pt"),
("1.25em", None, "15pt"),
("1em", "16pt", "16pt"),
("1.0em", "16pt", "16pt"),
("1.25em", "16pt", "20pt"),
("1rem", "16pt", "12pt"),
("1.0rem", "16pt", "12pt"),
("1.25rem", "16pt", "15pt"),
("100%", None, "12pt"),
("125%", None, "15pt"),
("100%", "16pt", "16pt"),
("125%", "16pt", "20pt"),
("2ex", None, "12pt"),
("2.0ex", None, "12pt"),
("2.50ex", None, "15pt"),
("inherit", "16pt", "16pt"),
("smaller", None, "10pt"),
("smaller", "18pt", "15pt"),
("larger", None, f"{14.4:f}pt"),
("larger", "15pt", "18pt"),
],
)
def test_css_relative_font_size(size, relative_to, resolved):
if relative_to is None:
inherited = None
else:
inherited = {"font-size": relative_to}
assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)

View File

@@ -0,0 +1,234 @@
import numpy as np
from pandas import DataFrame
import pandas._testing as tm
import pandas.io.formats.format as fmt
class TestEngFormatter:
def test_eng_float_formatter(self):
df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]})
fmt.set_eng_float_format()
result = df.to_string()
expected = (
" A\n"
"0 1.410E+00\n"
"1 141.000E+00\n"
"2 14.100E+03\n"
"3 1.410E+06"
)
assert result == expected
fmt.set_eng_float_format(use_eng_prefix=True)
result = df.to_string()
expected = " A\n0 1.410\n1 141.000\n2 14.100k\n3 1.410M"
assert result == expected
fmt.set_eng_float_format(accuracy=0)
result = df.to_string()
expected = " A\n0 1E+00\n1 141E+00\n2 14E+03\n3 1E+06"
assert result == expected
tm.reset_display_options()
def compare(self, formatter, input, output):
formatted_input = formatter(input)
assert formatted_input == output
def compare_all(self, formatter, in_out):
"""
Parameters:
-----------
formatter: EngFormatter under test
in_out: list of tuples. Each tuple = (number, expected_formatting)
It is tested if 'formatter(number) == expected_formatting'.
*number* should be >= 0 because formatter(-number) == fmt is also
tested. *fmt* is derived from *expected_formatting*
"""
for input, output in in_out:
self.compare(formatter, input, output)
self.compare(formatter, -input, "-" + output[1:])
def test_exponents_with_eng_prefix(self):
formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
f = np.sqrt(2)
in_out = [
(f * 10**-24, " 1.414y"),
(f * 10**-23, " 14.142y"),
(f * 10**-22, " 141.421y"),
(f * 10**-21, " 1.414z"),
(f * 10**-20, " 14.142z"),
(f * 10**-19, " 141.421z"),
(f * 10**-18, " 1.414a"),
(f * 10**-17, " 14.142a"),
(f * 10**-16, " 141.421a"),
(f * 10**-15, " 1.414f"),
(f * 10**-14, " 14.142f"),
(f * 10**-13, " 141.421f"),
(f * 10**-12, " 1.414p"),
(f * 10**-11, " 14.142p"),
(f * 10**-10, " 141.421p"),
(f * 10**-9, " 1.414n"),
(f * 10**-8, " 14.142n"),
(f * 10**-7, " 141.421n"),
(f * 10**-6, " 1.414u"),
(f * 10**-5, " 14.142u"),
(f * 10**-4, " 141.421u"),
(f * 10**-3, " 1.414m"),
(f * 10**-2, " 14.142m"),
(f * 10**-1, " 141.421m"),
(f * 10**0, " 1.414"),
(f * 10**1, " 14.142"),
(f * 10**2, " 141.421"),
(f * 10**3, " 1.414k"),
(f * 10**4, " 14.142k"),
(f * 10**5, " 141.421k"),
(f * 10**6, " 1.414M"),
(f * 10**7, " 14.142M"),
(f * 10**8, " 141.421M"),
(f * 10**9, " 1.414G"),
(f * 10**10, " 14.142G"),
(f * 10**11, " 141.421G"),
(f * 10**12, " 1.414T"),
(f * 10**13, " 14.142T"),
(f * 10**14, " 141.421T"),
(f * 10**15, " 1.414P"),
(f * 10**16, " 14.142P"),
(f * 10**17, " 141.421P"),
(f * 10**18, " 1.414E"),
(f * 10**19, " 14.142E"),
(f * 10**20, " 141.421E"),
(f * 10**21, " 1.414Z"),
(f * 10**22, " 14.142Z"),
(f * 10**23, " 141.421Z"),
(f * 10**24, " 1.414Y"),
(f * 10**25, " 14.142Y"),
(f * 10**26, " 141.421Y"),
]
self.compare_all(formatter, in_out)
def test_exponents_without_eng_prefix(self):
formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False)
f = np.pi
in_out = [
(f * 10**-24, " 3.1416E-24"),
(f * 10**-23, " 31.4159E-24"),
(f * 10**-22, " 314.1593E-24"),
(f * 10**-21, " 3.1416E-21"),
(f * 10**-20, " 31.4159E-21"),
(f * 10**-19, " 314.1593E-21"),
(f * 10**-18, " 3.1416E-18"),
(f * 10**-17, " 31.4159E-18"),
(f * 10**-16, " 314.1593E-18"),
(f * 10**-15, " 3.1416E-15"),
(f * 10**-14, " 31.4159E-15"),
(f * 10**-13, " 314.1593E-15"),
(f * 10**-12, " 3.1416E-12"),
(f * 10**-11, " 31.4159E-12"),
(f * 10**-10, " 314.1593E-12"),
(f * 10**-9, " 3.1416E-09"),
(f * 10**-8, " 31.4159E-09"),
(f * 10**-7, " 314.1593E-09"),
(f * 10**-6, " 3.1416E-06"),
(f * 10**-5, " 31.4159E-06"),
(f * 10**-4, " 314.1593E-06"),
(f * 10**-3, " 3.1416E-03"),
(f * 10**-2, " 31.4159E-03"),
(f * 10**-1, " 314.1593E-03"),
(f * 10**0, " 3.1416E+00"),
(f * 10**1, " 31.4159E+00"),
(f * 10**2, " 314.1593E+00"),
(f * 10**3, " 3.1416E+03"),
(f * 10**4, " 31.4159E+03"),
(f * 10**5, " 314.1593E+03"),
(f * 10**6, " 3.1416E+06"),
(f * 10**7, " 31.4159E+06"),
(f * 10**8, " 314.1593E+06"),
(f * 10**9, " 3.1416E+09"),
(f * 10**10, " 31.4159E+09"),
(f * 10**11, " 314.1593E+09"),
(f * 10**12, " 3.1416E+12"),
(f * 10**13, " 31.4159E+12"),
(f * 10**14, " 314.1593E+12"),
(f * 10**15, " 3.1416E+15"),
(f * 10**16, " 31.4159E+15"),
(f * 10**17, " 314.1593E+15"),
(f * 10**18, " 3.1416E+18"),
(f * 10**19, " 31.4159E+18"),
(f * 10**20, " 314.1593E+18"),
(f * 10**21, " 3.1416E+21"),
(f * 10**22, " 31.4159E+21"),
(f * 10**23, " 314.1593E+21"),
(f * 10**24, " 3.1416E+24"),
(f * 10**25, " 31.4159E+24"),
(f * 10**26, " 314.1593E+24"),
]
self.compare_all(formatter, in_out)
def test_rounding(self):
formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
in_out = [
(5.55555, " 5.556"),
(55.5555, " 55.556"),
(555.555, " 555.555"),
(5555.55, " 5.556k"),
(55555.5, " 55.556k"),
(555555, " 555.555k"),
]
self.compare_all(formatter, in_out)
formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
in_out = [
(5.55555, " 5.6"),
(55.5555, " 55.6"),
(555.555, " 555.6"),
(5555.55, " 5.6k"),
(55555.5, " 55.6k"),
(555555, " 555.6k"),
]
self.compare_all(formatter, in_out)
formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True)
in_out = [
(5.55555, " 6"),
(55.5555, " 56"),
(555.555, " 556"),
(5555.55, " 6k"),
(55555.5, " 56k"),
(555555, " 556k"),
]
self.compare_all(formatter, in_out)
formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
result = formatter(0)
assert result == " 0.000"
def test_nan(self):
# Issue #11981
formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
result = formatter(np.nan)
assert result == "NaN"
df = DataFrame(
{
"a": [1.5, 10.3, 20.5],
"b": [50.3, 60.67, 70.12],
"c": [100.2, 101.33, 120.33],
}
)
pt = df.pivot_table(values="a", index="b", columns="c")
fmt.set_eng_float_format(accuracy=1)
result = pt.to_string()
assert "NaN" in result
tm.reset_display_options()
def test_inf(self):
# Issue #11981
formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
result = formatter(np.inf)
assert result == "inf"

View File

@@ -0,0 +1,492 @@
from io import StringIO
import re
from string import ascii_uppercase as uppercase
import sys
import textwrap
import numpy as np
import pytest
from pandas.compat import (
IS64,
PYPY,
)
from pandas import (
CategoricalIndex,
DataFrame,
MultiIndex,
Series,
date_range,
option_context,
)
@pytest.fixture
def duplicate_columns_frame():
"""Dataframe with duplicate column names."""
return DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"])
def test_info_empty():
df = DataFrame()
buf = StringIO()
df.info(buf=buf)
result = buf.getvalue()
expected = textwrap.dedent(
"""\
<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Empty DataFrame"""
)
assert result == expected
def test_info_categorical_column_smoke_test():
n = 2500
df = DataFrame({"int64": np.random.randint(100, size=n)})
df["category"] = Series(
np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))
).astype("category")
df.isna()
buf = StringIO()
df.info(buf=buf)
df2 = df[df["category"] == "d"]
buf = StringIO()
df2.info(buf=buf)
@pytest.mark.parametrize(
"fixture_func_name",
[
"int_frame",
"float_frame",
"datetime_frame",
"duplicate_columns_frame",
],
)
def test_info_smoke_test(fixture_func_name, request):
frame = request.getfixturevalue(fixture_func_name)
buf = StringIO()
frame.info(buf=buf)
result = buf.getvalue().splitlines()
assert len(result) > 10
@pytest.mark.parametrize(
"num_columns, max_info_columns, verbose",
[
(10, 100, True),
(10, 11, True),
(10, 10, True),
(10, 9, False),
(10, 1, False),
],
)
def test_info_default_verbose_selection(num_columns, max_info_columns, verbose):
frame = DataFrame(np.random.randn(5, num_columns))
with option_context("display.max_info_columns", max_info_columns):
io_default = StringIO()
frame.info(buf=io_default)
result = io_default.getvalue()
io_explicit = StringIO()
frame.info(buf=io_explicit, verbose=verbose)
expected = io_explicit.getvalue()
assert result == expected
def test_info_verbose_check_header_separator_body():
buf = StringIO()
size = 1001
start = 5
frame = DataFrame(np.random.randn(3, size))
frame.info(verbose=True, buf=buf)
res = buf.getvalue()
header = " # Column Dtype \n--- ------ ----- "
assert header in res
frame.info(verbose=True, buf=buf)
buf.seek(0)
lines = buf.readlines()
assert len(lines) > 0
for i, line in enumerate(lines):
if i >= start and i < start + size:
line_nr = f" {i - start} "
assert line.startswith(line_nr)
@pytest.mark.parametrize(
"size, header_exp, separator_exp, first_line_exp, last_line_exp",
[
(
4,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 3 3 3 non-null float64",
),
(
11,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 10 10 3 non-null float64",
),
(
101,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 100 100 3 non-null float64",
),
(
1001,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 1000 1000 3 non-null float64",
),
(
10001,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 10000 10000 3 non-null float64",
),
],
)
def test_info_verbose_with_counts_spacing(
size, header_exp, separator_exp, first_line_exp, last_line_exp
):
"""Test header column, spacer, first line and last line in verbose mode."""
frame = DataFrame(np.random.randn(3, size))
with StringIO() as buf:
frame.info(verbose=True, show_counts=True, buf=buf)
all_lines = buf.getvalue().splitlines()
# Here table would contain only header, separator and table lines
# dframe repr, index summary, memory usage and dtypes are excluded
table = all_lines[3:-2]
header, separator, first_line, *rest, last_line = table
assert header == header_exp
assert separator == separator_exp
assert first_line == first_line_exp
assert last_line == last_line_exp
def test_info_memory():
# https://github.com/pandas-dev/pandas/issues/21056
df = DataFrame({"a": Series([1, 2], dtype="i8")})
buf = StringIO()
df.info(buf=buf)
result = buf.getvalue()
bytes = float(df.memory_usage().sum())
expected = textwrap.dedent(
f"""\
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 1 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 a 2 non-null int64
dtypes: int64(1)
memory usage: {bytes} bytes
"""
)
assert result == expected
def test_info_wide():
io = StringIO()
df = DataFrame(np.random.randn(5, 101))
df.info(buf=io)
io = StringIO()
df.info(buf=io, max_cols=101)
result = io.getvalue()
assert len(result.splitlines()) > 100
expected = result
with option_context("display.max_info_columns", 101):
io = StringIO()
df.info(buf=io)
result = io.getvalue()
assert result == expected
def test_info_duplicate_columns_shows_correct_dtypes():
# GH11761
io = StringIO()
frame = DataFrame([[1, 2.0]], columns=["a", "a"])
frame.info(buf=io)
lines = io.getvalue().splitlines(True)
assert " 0 a 1 non-null int64 \n" == lines[5]
assert " 1 a 1 non-null float64\n" == lines[6]
def test_info_shows_column_dtypes():
dtypes = [
"int64",
"float64",
"datetime64[ns]",
"timedelta64[ns]",
"complex128",
"object",
"bool",
]
data = {}
n = 10
for i, dtype in enumerate(dtypes):
data[i] = np.random.randint(2, size=n).astype(dtype)
df = DataFrame(data)
buf = StringIO()
df.info(buf=buf)
res = buf.getvalue()
header = (
" # Column Non-Null Count Dtype \n"
"--- ------ -------------- ----- "
)
assert header in res
for i, dtype in enumerate(dtypes):
name = f" {i:d} {i:d} {n:d} non-null {dtype}"
assert name in res
def test_info_max_cols():
df = DataFrame(np.random.randn(10, 5))
for len_, verbose in [(5, None), (5, False), (12, True)]:
# For verbose always ^ setting ^ summarize ^ full output
with option_context("max_info_columns", 4):
buf = StringIO()
df.info(buf=buf, verbose=verbose)
res = buf.getvalue()
assert len(res.strip().split("\n")) == len_
for len_, verbose in [(12, None), (5, False), (12, True)]:
# max_cols not exceeded
with option_context("max_info_columns", 5):
buf = StringIO()
df.info(buf=buf, verbose=verbose)
res = buf.getvalue()
assert len(res.strip().split("\n")) == len_
for len_, max_cols in [(12, 5), (5, 4)]:
# setting truncates
with option_context("max_info_columns", 4):
buf = StringIO()
df.info(buf=buf, max_cols=max_cols)
res = buf.getvalue()
assert len(res.strip().split("\n")) == len_
# setting wouldn't truncate
with option_context("max_info_columns", 5):
buf = StringIO()
df.info(buf=buf, max_cols=max_cols)
res = buf.getvalue()
assert len(res.strip().split("\n")) == len_
def test_info_memory_usage():
# Ensure memory usage is displayed, when asserted, on the last line
dtypes = [
"int64",
"float64",
"datetime64[ns]",
"timedelta64[ns]",
"complex128",
"object",
"bool",
]
data = {}
n = 10
for i, dtype in enumerate(dtypes):
data[i] = np.random.randint(2, size=n).astype(dtype)
df = DataFrame(data)
buf = StringIO()
# display memory usage case
df.info(buf=buf, memory_usage=True)
res = buf.getvalue().splitlines()
assert "memory usage: " in res[-1]
# do not display memory usage case
df.info(buf=buf, memory_usage=False)
res = buf.getvalue().splitlines()
assert "memory usage: " not in res[-1]
df.info(buf=buf, memory_usage=True)
res = buf.getvalue().splitlines()
# memory usage is a lower bound, so print it as XYZ+ MB
assert re.match(r"memory usage: [^+]+\+", res[-1])
df.iloc[:, :5].info(buf=buf, memory_usage=True)
res = buf.getvalue().splitlines()
# excluded column with object dtype, so estimate is accurate
assert not re.match(r"memory usage: [^+]+\+", res[-1])
# Test a DataFrame with duplicate columns
dtypes = ["int64", "int64", "int64", "float64"]
data = {}
n = 100
for i, dtype in enumerate(dtypes):
data[i] = np.random.randint(2, size=n).astype(dtype)
df = DataFrame(data)
df.columns = dtypes
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
df_with_object_index.info(buf=buf, memory_usage=True)
res = buf.getvalue().splitlines()
assert re.match(r"memory usage: [^+]+\+", res[-1])
df_with_object_index.info(buf=buf, memory_usage="deep")
res = buf.getvalue().splitlines()
assert re.match(r"memory usage: [^+]+$", res[-1])
# Ensure df size is as expected
# (cols * rows * bytes) + index size
df_size = df.memory_usage().sum()
exp_size = len(dtypes) * n * 8 + df.index.nbytes
assert df_size == exp_size
# Ensure number of cols in memory_usage is the same as df
size_df = np.size(df.columns.values) + 1 # index=True; default
assert size_df == np.size(df.memory_usage())
# assert deep works only on object
assert df.memory_usage().sum() == df.memory_usage(deep=True).sum()
# test for validity
DataFrame(1, index=["a"], columns=["A"]).memory_usage(index=True)
DataFrame(1, index=["a"], columns=["A"]).index.nbytes
df = DataFrame(
data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]
)
df.index.nbytes
df.memory_usage(index=True)
df.index.values.nbytes
mem = df.memory_usage(deep=True).sum()
assert mem > 0
@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
def test_info_memory_usage_deep_not_pypy():
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
assert (
df_with_object_index.memory_usage(index=True, deep=True).sum()
> df_with_object_index.memory_usage(index=True).sum()
)
df_object = DataFrame({"a": ["a"]})
assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum()
@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result")
def test_info_memory_usage_deep_pypy():
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
assert (
df_with_object_index.memory_usage(index=True, deep=True).sum()
== df_with_object_index.memory_usage(index=True).sum()
)
df_object = DataFrame({"a": ["a"]})
assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum()
@pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design")
def test_usage_via_getsizeof():
df = DataFrame(
data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]
)
mem = df.memory_usage(deep=True).sum()
# sys.getsizeof will call the .memory_usage with
# deep=True, and add on some GC overhead
diff = mem - sys.getsizeof(df)
assert abs(diff) < 100
def test_info_memory_usage_qualified():
buf = StringIO()
df = DataFrame(1, columns=list("ab"), index=[1, 2, 3])
df.info(buf=buf)
assert "+" not in buf.getvalue()
buf = StringIO()
df = DataFrame(1, columns=list("ab"), index=list("ABC"))
df.info(buf=buf)
assert "+" in buf.getvalue()
buf = StringIO()
df = DataFrame(
1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)])
)
df.info(buf=buf)
assert "+" not in buf.getvalue()
buf = StringIO()
df = DataFrame(
1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]])
)
df.info(buf=buf)
assert "+" in buf.getvalue()
def test_info_memory_usage_bug_on_multiindex():
# GH 14308
# memory usage introspection should not materialize .values
def memory_usage(f):
return f.memory_usage(deep=True).sum()
N = 100
M = len(uppercase)
index = MultiIndex.from_product(
[list(uppercase), date_range("20160101", periods=N)],
names=["id", "date"],
)
df = DataFrame({"value": np.random.randn(N * M)}, index=index)
unstacked = df.unstack("id")
assert df.values.nbytes == unstacked.values.nbytes
assert memory_usage(df) > memory_usage(unstacked)
# high upper bound
assert memory_usage(unstacked) - memory_usage(df) < 2000
def test_info_categorical():
# GH14298
idx = CategoricalIndex(["a", "b"])
df = DataFrame(np.zeros((2, 2)), index=idx, columns=idx)
buf = StringIO()
df.info(buf=buf)
@pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system")
def test_info_int_columns():
# GH#37245
df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"])
buf = StringIO()
df.info(show_counts=True, buf=buf)
result = buf.getvalue()
expected = textwrap.dedent(
"""\
<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, A to B
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 1 2 non-null int64
1 2 2 non-null int64
dtypes: int64(2)
memory usage: 48.0+ bytes
"""
)
assert result == expected

View File

@@ -0,0 +1,200 @@
import numpy as np
import pytest
import pandas._config.config as cf
import pandas as pd
import pandas.io.formats.format as fmt
import pandas.io.formats.printing as printing
def test_adjoin():
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
expected = "a dd ggg\nb ee hhh\nc ff iii"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
def test_repr_binary_type():
import string
letters = string.ascii_letters
try:
raw = bytes(letters, encoding=cf.get_option("display.encoding"))
except TypeError:
raw = bytes(letters)
b = str(raw.decode("utf-8"))
res = printing.pprint_thing(b, quote_strings=True)
assert res == repr(b)
res = printing.pprint_thing(b, quote_strings=False)
assert res == b
class TestFormattBase:
def test_adjoin(self):
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
expected = "a dd ggg\nb ee hhh\nc ff iii"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
def test_adjoin_unicode(self):
data = [["", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "hhh", "いいい"]]
expected = "あ dd ggg\nb ええ hhh\nc ff いいい"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
adj = fmt.EastAsianTextAdjustment()
expected = """あ dd ggg
b ええ hhh
c ff いいい"""
adjoined = adj.adjoin(2, *data)
assert adjoined == expected
cols = adjoined.split("\n")
assert adj.len(cols[0]) == 13
assert adj.len(cols[1]) == 13
assert adj.len(cols[2]) == 16
expected = """あ dd ggg
b ええ hhh
c ff いいい"""
adjoined = adj.adjoin(7, *data)
assert adjoined == expected
cols = adjoined.split("\n")
assert adj.len(cols[0]) == 23
assert adj.len(cols[1]) == 23
assert adj.len(cols[2]) == 26
def test_justify(self):
adj = fmt.EastAsianTextAdjustment()
def just(x, *args, **kwargs):
# wrapper to test single str
return adj.justify([x], *args, **kwargs)[0]
assert just("abc", 5, mode="left") == "abc "
assert just("abc", 5, mode="center") == " abc "
assert just("abc", 5, mode="right") == " abc"
assert just("abc", 5, mode="left") == "abc "
assert just("abc", 5, mode="center") == " abc "
assert just("abc", 5, mode="right") == " abc"
assert just("パンダ", 5, mode="left") == "パンダ"
assert just("パンダ", 5, mode="center") == "パンダ"
assert just("パンダ", 5, mode="right") == "パンダ"
assert just("パンダ", 10, mode="left") == "パンダ "
assert just("パンダ", 10, mode="center") == " パンダ "
assert just("パンダ", 10, mode="right") == " パンダ"
def test_east_asian_len(self):
adj = fmt.EastAsianTextAdjustment()
assert adj.len("abc") == 3
assert adj.len("abc") == 3
assert adj.len("パンダ") == 6
assert adj.len("パンダ") == 5
assert adj.len("パンダpanda") == 11
assert adj.len("パンダpanda") == 10
def test_ambiguous_width(self):
adj = fmt.EastAsianTextAdjustment()
assert adj.len("¡¡ab") == 4
with cf.option_context("display.unicode.ambiguous_as_wide", True):
adj = fmt.EastAsianTextAdjustment()
assert adj.len("¡¡ab") == 6
data = [["", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "¡¡ab", "いいい"]]
expected = "あ dd ggg \nb ええ ¡¡ab\nc ff いいい"
adjoined = adj.adjoin(2, *data)
assert adjoined == expected
class TestTableSchemaRepr:
@pytest.mark.filterwarnings(
"ignore:.*signature may therefore change.*:FutureWarning"
)
def test_publishes(self, ip):
ipython = ip.instance(config=ip.config)
df = pd.DataFrame({"A": [1, 2]})
objects = [df["A"], df, df] # dataframe / series
expected_keys = [
{"text/plain", "application/vnd.dataresource+json"},
{"text/plain", "text/html", "application/vnd.dataresource+json"},
]
opt = pd.option_context("display.html.table_schema", True)
for obj, expected in zip(objects, expected_keys):
with opt:
formatted = ipython.display_formatter.format(obj)
assert set(formatted[0].keys()) == expected
with_latex = pd.option_context("display.latex.repr", True)
with opt, with_latex:
formatted = ipython.display_formatter.format(obj)
expected = {
"text/plain",
"text/html",
"text/latex",
"application/vnd.dataresource+json",
}
assert set(formatted[0].keys()) == expected
def test_publishes_not_implemented(self, ip):
# column MultiIndex
# GH 15996
midx = pd.MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
df = pd.DataFrame(np.random.randn(5, len(midx)), columns=midx)
opt = pd.option_context("display.html.table_schema", True)
with opt:
formatted = ip.instance(config=ip.config).display_formatter.format(df)
expected = {"text/plain", "text/html"}
assert set(formatted[0].keys()) == expected
def test_config_on(self):
df = pd.DataFrame({"A": [1, 2]})
with pd.option_context("display.html.table_schema", True):
result = df._repr_data_resource_()
assert result is not None
def test_config_default_off(self):
df = pd.DataFrame({"A": [1, 2]})
with pd.option_context("display.html.table_schema", False):
result = df._repr_data_resource_()
assert result is None
def test_enable_data_resource_formatter(self, ip):
# GH 10491
formatters = ip.instance(config=ip.config).display_formatter.formatters
mimetype = "application/vnd.dataresource+json"
with pd.option_context("display.html.table_schema", True):
assert "application/vnd.dataresource+json" in formatters
assert formatters[mimetype].enabled
# still there, just disabled
assert "application/vnd.dataresource+json" in formatters
assert not formatters[mimetype].enabled
# able to re-set
with pd.option_context("display.html.table_schema", True):
assert "application/vnd.dataresource+json" in formatters
assert formatters[mimetype].enabled
# smoke test that it works
ip.instance(config=ip.config).display_formatter.format(cf)

View File

@@ -0,0 +1,179 @@
from io import StringIO
from string import ascii_uppercase as uppercase
import textwrap
import numpy as np
import pytest
from pandas.compat import PYPY
from pandas import (
CategoricalIndex,
MultiIndex,
Series,
date_range,
)
def test_info_categorical_column_just_works():
n = 2500
data = np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))
s = Series(data).astype("category")
s.isna()
buf = StringIO()
s.info(buf=buf)
s2 = s[s == "d"]
buf = StringIO()
s2.info(buf=buf)
def test_info_categorical():
# GH14298
idx = CategoricalIndex(["a", "b"])
s = Series(np.zeros(2), index=idx)
buf = StringIO()
s.info(buf=buf)
@pytest.mark.parametrize("verbose", [True, False])
def test_info_series(lexsorted_two_level_string_multiindex, verbose):
index = lexsorted_two_level_string_multiindex
ser = Series(range(len(index)), index=index, name="sth")
buf = StringIO()
ser.info(verbose=verbose, buf=buf)
result = buf.getvalue()
expected = textwrap.dedent(
"""\
<class 'pandas.core.series.Series'>
MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
"""
)
if verbose:
expected += textwrap.dedent(
"""\
Series name: sth
Non-Null Count Dtype
-------------- -----
10 non-null int64
"""
)
expected += textwrap.dedent(
f"""\
dtypes: int64(1)
memory usage: {ser.memory_usage()}.0+ bytes
"""
)
assert result == expected
def test_info_memory():
s = Series([1, 2], dtype="i8")
buf = StringIO()
s.info(buf=buf)
result = buf.getvalue()
memory_bytes = float(s.memory_usage())
expected = textwrap.dedent(
f"""\
<class 'pandas.core.series.Series'>
RangeIndex: 2 entries, 0 to 1
Series name: None
Non-Null Count Dtype
-------------- -----
2 non-null int64
dtypes: int64(1)
memory usage: {memory_bytes} bytes
"""
)
assert result == expected
def test_info_wide():
s = Series(np.random.randn(101))
msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
with pytest.raises(ValueError, match=msg):
s.info(max_cols=1)
def test_info_shows_dtypes():
dtypes = [
"int64",
"float64",
"datetime64[ns]",
"timedelta64[ns]",
"complex128",
"object",
"bool",
]
n = 10
for dtype in dtypes:
s = Series(np.random.randint(2, size=n).astype(dtype))
buf = StringIO()
s.info(buf=buf)
res = buf.getvalue()
name = f"{n:d} non-null {dtype}"
assert name in res
@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
def test_info_memory_usage_deep_not_pypy():
s_with_object_index = Series({"a": [1]}, index=["foo"])
assert s_with_object_index.memory_usage(
index=True, deep=True
) > s_with_object_index.memory_usage(index=True)
s_object = Series({"a": ["a"]})
assert s_object.memory_usage(deep=True) > s_object.memory_usage()
@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result")
def test_info_memory_usage_deep_pypy():
s_with_object_index = Series({"a": [1]}, index=["foo"])
assert s_with_object_index.memory_usage(
index=True, deep=True
) == s_with_object_index.memory_usage(index=True)
s_object = Series({"a": ["a"]})
assert s_object.memory_usage(deep=True) == s_object.memory_usage()
@pytest.mark.parametrize(
"series, plus",
[
(Series(1, index=[1, 2, 3]), False),
(Series(1, index=list("ABC")), True),
(Series(1, index=MultiIndex.from_product([range(3), range(3)])), False),
(
Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])),
True,
),
],
)
def test_info_memory_usage_qualified(series, plus):
buf = StringIO()
series.info(buf=buf)
if plus:
assert "+" in buf.getvalue()
else:
assert "+" not in buf.getvalue()
def test_info_memory_usage_bug_on_multiindex():
# GH 14308
# memory usage introspection should not materialize .values
N = 100
M = len(uppercase)
index = MultiIndex.from_product(
[list(uppercase), date_range("20160101", periods=N)],
names=["id", "date"],
)
s = Series(np.random.randn(N * M), index=index)
unstacked = s.unstack("id")
assert s.values.nbytes == unstacked.values.nbytes
assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
# high upper bound
diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
assert diff < 2000

View File

@@ -0,0 +1,717 @@
import io
import os
from pathlib import Path
import sys
from zipfile import ZipFile
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
compat,
)
import pandas._testing as tm
import pandas.io.common as icom
class TestToCSV:
def test_to_csv_with_single_column(self):
# see gh-18676, https://bugs.python.org/issue32255
#
# Python's CSV library adds an extraneous '""'
# before the newline when the NaN-value is in
# the first row. Otherwise, only the newline
# character is added. This behavior is inconsistent
# and was patched in https://bugs.python.org/pull_request4672.
df1 = DataFrame([None, 1])
expected1 = """\
""
1.0
"""
with tm.ensure_clean("test.csv") as path:
df1.to_csv(path, header=None, index=None)
with open(path) as f:
assert f.read() == expected1
df2 = DataFrame([1, None])
expected2 = """\
1.0
""
"""
with tm.ensure_clean("test.csv") as path:
df2.to_csv(path, header=None, index=None)
with open(path) as f:
assert f.read() == expected2
def test_to_csv_defualt_encoding(self):
# GH17097
df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]})
with tm.ensure_clean("test.csv") as path:
# the default to_csv encoding is uft-8.
df.to_csv(path)
tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
def test_to_csv_quotechar(self):
df = DataFrame({"col": [1, 2]})
expected = """\
"","col"
"0","1"
"1","2"
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1) # 1=QUOTE_ALL
with open(path) as f:
assert f.read() == expected
expected = """\
$$,$col$
$0$,$1$
$1$,$2$
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, quotechar="$")
with open(path) as f:
assert f.read() == expected
with tm.ensure_clean("test.csv") as path:
with pytest.raises(TypeError, match="quotechar"):
df.to_csv(path, quoting=1, quotechar=None)
def test_to_csv_doublequote(self):
df = DataFrame({"col": ['a"a', '"bb"']})
expected = '''\
"","col"
"0","a""a"
"1","""bb"""
'''
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL
with open(path) as f:
assert f.read() == expected
from _csv import Error
with tm.ensure_clean("test.csv") as path:
with pytest.raises(Error, match="escapechar"):
df.to_csv(path, doublequote=False) # no escapechar set
def test_to_csv_escapechar(self):
df = DataFrame({"col": ['a"a', '"bb"']})
expected = """\
"","col"
"0","a\\"a"
"1","\\"bb\\""
"""
with tm.ensure_clean("test.csv") as path: # QUOTE_ALL
df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
with open(path) as f:
assert f.read() == expected
df = DataFrame({"col": ["a,a", ",bb,"]})
expected = """\
,col
0,a\\,a
1,\\,bb\\,
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE
with open(path) as f:
assert f.read() == expected
def test_csv_to_string(self):
df = DataFrame({"col": [1, 2]})
expected_rows = [",col", "0,1", "1,2"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv() == expected
def test_to_csv_decimal(self):
# see gh-781
df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})
expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv() == expected_default
expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(decimal=",", sep=";") == expected_european_excel
expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(float_format="%.2f") == expected_float_format_default
expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
assert (
df.to_csv(decimal=",", sep=";", float_format="%.2f")
== expected_float_format
)
# see gh-11553: testing if decimal is taken into account for '0.0'
df = DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1})
expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(index=False, decimal="^") == expected
# same but for an index
assert df.set_index("a").to_csv(decimal="^") == expected
# same for a multi-index
assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
def test_to_csv_float_format(self):
# testing if float_format is taken into account for the index
# GH 11553
df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})
expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(float_format="%.2f") == expected
# same for a multi-index
assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
def test_to_csv_na_rep(self):
# see gh-11553
#
# Testing if NaN values are correctly represented in the index.
df = DataFrame({"a": [0, np.NaN], "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
# now with an index containing only NaNs
df = DataFrame({"a": np.NaN, "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
# check if na_rep parameter does not break anything when no NaN
df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ")
expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
assert expected == csv
def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype):
# GH 29975
# Make sure full na_rep shows up when a dtype is provided
expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv(
na_rep="ZZZZZ"
)
assert expected == csv
def test_to_csv_date_format(self):
# GH 10209
df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")})
df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="d")})
expected_rows = [
",A",
"0,2013-01-01 00:00:00",
"1,2013-01-01 00:00:01",
"2,2013-01-01 00:00:02",
"3,2013-01-01 00:00:03",
"4,2013-01-01 00:00:04",
]
expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_sec.to_csv() == expected_default_sec
expected_rows = [
",A",
"0,2013-01-01 00:00:00",
"1,2013-01-02 00:00:00",
"2,2013-01-03 00:00:00",
"3,2013-01-04 00:00:00",
"4,2013-01-05 00:00:00",
]
expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day
expected_rows = [
",A",
"0,2013-01-01",
"1,2013-01-01",
"2,2013-01-01",
"3,2013-01-01",
"4,2013-01-01",
]
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
expected_rows = [
",A",
"0,2013-01-01",
"1,2013-01-02",
"2,2013-01-03",
"3,2013-01-04",
"4,2013-01-05",
]
expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_day.to_csv() == expected_default_day
assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day
# see gh-7791
#
# Testing if date_format parameter is taken into account
# for multi-indexed DataFrames.
df_sec["B"] = 0
df_sec["C"] = 1
expected_rows = ["A,B,C", "2013-01-01,0,1.0"]
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
def test_to_csv_different_datetime_formats(self):
# GH#21734
df = DataFrame(
{
"date": pd.to_datetime("1970-01-01"),
"datetime": pd.date_range("1970-01-01", periods=2, freq="H"),
}
)
expected_rows = [
"date,datetime",
"1970-01-01,1970-01-01 00:00:00",
"1970-01-01,1970-01-01 01:00:00",
]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(index=False) == expected
def test_to_csv_date_format_in_categorical(self):
# GH#40754
ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d"))
ser = ser.astype("category")
expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""'])
assert ser.to_csv(index=False) == expected
ser = pd.Series(
pd.date_range(
start="2021-03-27", freq="D", periods=1, tz="Europe/Berlin"
).append(pd.DatetimeIndex([pd.NaT]))
)
ser = ser.astype("category")
assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected
def test_to_csv_multi_index(self):
# see gh-6618
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))
exp_rows = [",1", ",2", "0,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["1", "2", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
df = DataFrame(
[1],
columns=pd.MultiIndex.from_arrays([[1], [2]]),
index=pd.MultiIndex.from_arrays([[1], [2]]),
)
exp_rows = [",,1", ",,2", "1,2,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["1", "2", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]]))
exp_rows = [",foo", ",bar", "0,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["foo", "bar", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
@pytest.mark.parametrize(
"ind,expected",
[
(
pd.MultiIndex(levels=[[1.0]], codes=[[0]], names=["x"]),
"x,data\n1.0,1\n",
),
(
pd.MultiIndex(
levels=[[1.0], [2.0]], codes=[[0], [0]], names=["x", "y"]
),
"x,y,data\n1.0,2.0,1\n",
),
],
)
@pytest.mark.parametrize("klass", [DataFrame, pd.Series])
def test_to_csv_single_level_multi_index(self, ind, expected, klass):
# see gh-19589
result = klass(pd.Series([1], ind, name="data")).to_csv(
line_terminator="\n", header=True
)
assert result == expected
def test_to_csv_string_array_ascii(self):
# GH 10813
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
df = DataFrame(str_array)
expected_ascii = """\
,names
0,"['foo', 'bar']"
1,"['baz', 'qux']"
"""
with tm.ensure_clean("str_test.csv") as path:
df.to_csv(path, encoding="ascii")
with open(path) as f:
assert f.read() == expected_ascii
def test_to_csv_string_array_utf8(self):
# GH 10813
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
df = DataFrame(str_array)
expected_utf8 = """\
,names
0,"['foo', 'bar']"
1,"['baz', 'qux']"
"""
with tm.ensure_clean("unicode_test.csv") as path:
df.to_csv(path, encoding="utf-8")
with open(path) as f:
assert f.read() == expected_utf8
def test_to_csv_string_with_lf(self):
# GH 20353
data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
df = DataFrame(data)
with tm.ensure_clean("lf_test.csv") as path:
# case 1: The default line terminator(=os.linesep)(PR 21406)
os_linesep = os.linesep.encode("utf-8")
expected_noarg = (
b"int,str_lf"
+ os_linesep
+ b"1,abc"
+ os_linesep
+ b'2,"d\nef"'
+ os_linesep
+ b'3,"g\nh\n\ni"'
+ os_linesep
)
df.to_csv(path, index=False)
with open(path, "rb") as f:
assert f.read() == expected_noarg
with tm.ensure_clean("lf_test.csv") as path:
# case 2: LF as line terminator
expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
df.to_csv(path, line_terminator="\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_lf
with tm.ensure_clean("lf_test.csv") as path:
# case 3: CRLF as line terminator
# 'line_terminator' should not change inner element
expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
df.to_csv(path, line_terminator="\r\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_crlf
def test_to_csv_string_with_crlf(self):
# GH 20353
data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
df = DataFrame(data)
with tm.ensure_clean("crlf_test.csv") as path:
# case 1: The default line terminator(=os.linesep)(PR 21406)
os_linesep = os.linesep.encode("utf-8")
expected_noarg = (
b"int,str_crlf"
+ os_linesep
+ b"1,abc"
+ os_linesep
+ b'2,"d\r\nef"'
+ os_linesep
+ b'3,"g\r\nh\r\n\r\ni"'
+ os_linesep
)
df.to_csv(path, index=False)
with open(path, "rb") as f:
assert f.read() == expected_noarg
with tm.ensure_clean("crlf_test.csv") as path:
# case 2: LF as line terminator
expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
df.to_csv(path, line_terminator="\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_lf
with tm.ensure_clean("crlf_test.csv") as path:
# case 3: CRLF as line terminator
# 'line_terminator' should not change inner element
expected_crlf = (
b"int,str_crlf\r\n"
b"1,abc\r\n"
b'2,"d\r\nef"\r\n'
b'3,"g\r\nh\r\n\r\ni"\r\n'
)
df.to_csv(path, line_terminator="\r\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_crlf
def test_to_csv_stdout_file(self, capsys):
# GH 21561
df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"])
expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"]
expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
df.to_csv(sys.stdout, encoding="ascii")
captured = capsys.readouterr()
assert captured.out == expected_ascii
assert not sys.stdout.closed
@pytest.mark.xfail(
compat.is_platform_windows(),
reason=(
"Especially in Windows, file stream should not be passed"
"to csv writer without newline='' option."
"(https://docs.python.org/3.6/library/csv.html#csv.writer)"
),
)
def test_to_csv_write_to_open_file(self):
# GH 21696
df = DataFrame({"a": ["x", "y", "z"]})
expected = """\
manual header
x
y
z
"""
with tm.ensure_clean("test.txt") as path:
with open(path, "w") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)
with open(path) as f:
assert f.read() == expected
def test_to_csv_write_to_open_file_with_newline_py3(self):
# see gh-21696
# see gh-20353
df = DataFrame({"a": ["x", "y", "z"]})
expected_rows = ["x", "y", "z"]
expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
with tm.ensure_clean("test.txt") as path:
with open(path, "w", newline="") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)
with open(path, "rb") as f:
assert f.read() == bytes(expected, "utf-8")
@pytest.mark.parametrize("to_infer", [True, False])
@pytest.mark.parametrize("read_infer", [True, False])
def test_to_csv_compression(self, compression_only, read_infer, to_infer):
# see gh-15008
compression = compression_only
# We'll complete file extension subsequently.
filename = "test."
filename += icom._compression_to_extension[compression]
df = DataFrame({"A": [1]})
to_compression = "infer" if to_infer else compression
read_compression = "infer" if read_infer else compression
with tm.ensure_clean(filename) as path:
df.to_csv(path, compression=to_compression)
result = pd.read_csv(path, index_col=0, compression=read_compression)
tm.assert_frame_equal(result, df)
def test_to_csv_compression_dict(self, compression_only):
# GH 26023
method = compression_only
df = DataFrame({"ABC": [1]})
filename = "to_csv_compress_as_dict."
extension = {
"gzip": "gz",
"zstd": "zst",
}.get(method, method)
filename += extension
with tm.ensure_clean(filename) as path:
df.to_csv(path, compression={"method": method})
read_df = pd.read_csv(path, index_col=0)
tm.assert_frame_equal(read_df, df)
def test_to_csv_compression_dict_no_method_raises(self):
# GH 26023
df = DataFrame({"ABC": [1]})
compression = {"some_option": True}
msg = "must have key 'method'"
with tm.ensure_clean("out.zip") as path:
with pytest.raises(ValueError, match=msg):
df.to_csv(path, compression=compression)
@pytest.mark.parametrize("compression", ["zip", "infer"])
@pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
def test_to_csv_zip_arguments(self, compression, archive_name):
# GH 26023
df = DataFrame({"ABC": [1]})
with tm.ensure_clean("to_csv_archive_name.zip") as path:
df.to_csv(
path, compression={"method": compression, "archive_name": archive_name}
)
with ZipFile(path) as zp:
assert len(zp.filelist) == 1
archived_file = zp.filelist[0].filename
assert archived_file == archive_name
@pytest.mark.parametrize(
"filename,expected_arcname",
[
("archive.csv", "archive.csv"),
("archive.tsv", "archive.tsv"),
("archive.csv.zip", "archive.csv"),
("archive.tsv.zip", "archive.tsv"),
("archive.zip", "archive"),
],
)
def test_to_csv_zip_infer_name(self, filename, expected_arcname):
# GH 39465
df = DataFrame({"ABC": [1]})
with tm.ensure_clean_dir() as dir:
path = Path(dir, filename)
df.to_csv(path, compression="zip")
with ZipFile(path) as zp:
assert len(zp.filelist) == 1
archived_file = zp.filelist[0].filename
assert archived_file == expected_arcname
@pytest.mark.parametrize("df_new_type", ["Int64"])
def test_to_csv_na_rep_long_string(self, df_new_type):
# see gh-25099
df = DataFrame({"c": [float("nan")] * 3})
df = df.astype(df_new_type)
expected_rows = ["c", "mynull", "mynull", "mynull"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
result = df.to_csv(index=False, na_rep="mynull", encoding="ascii")
assert expected == result
def test_to_csv_timedelta_precision(self):
# GH 6783
s = pd.Series([1, 1]).astype("timedelta64[ns]")
buf = io.StringIO()
s.to_csv(buf)
result = buf.getvalue()
expected_rows = [
",0",
"0,0 days 00:00:00.000000001",
"1,0 days 00:00:00.000000001",
]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert result == expected
def test_na_rep_truncated(self):
# https://github.com/pandas-dev/pandas/issues/31447
result = pd.Series(range(8, 12)).to_csv(na_rep="-")
expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"])
assert result == expected
result = pd.Series([True, False]).to_csv(na_rep="nan")
expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"])
assert result == expected
result = pd.Series([1.1, 2.2]).to_csv(na_rep=".")
expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
assert result == expected
@pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
def test_to_csv_errors(self, errors):
# GH 22610
data = ["\ud800foo"]
ser = pd.Series(data, index=pd.Index(data))
with tm.ensure_clean("test.csv") as path:
ser.to_csv(path, errors=errors)
# No use in reading back the data as it is not the same anymore
# due to the error handling
@pytest.mark.parametrize("mode", ["wb", "w"])
def test_to_csv_binary_handle(self, mode):
"""
Binary file objects should work (if 'mode' contains a 'b') or even without
it in most cases.
GH 35058 and GH 19827
"""
df = tm.makeDataFrame()
with tm.ensure_clean() as path:
with open(path, mode="w+b") as handle:
df.to_csv(handle, mode=mode)
tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
@pytest.mark.parametrize("mode", ["wb", "w"])
def test_to_csv_encoding_binary_handle(self, mode):
"""
Binary file objects should honor a specified encoding.
GH 23854 and GH 13068 with binary handles
"""
# example from GH 23854
content = "a, b, 🐟".encode("utf-8-sig")
buffer = io.BytesIO(content)
df = pd.read_csv(buffer, encoding="utf-8-sig")
buffer = io.BytesIO()
df.to_csv(buffer, mode=mode, encoding="utf-8-sig", index=False)
buffer.seek(0) # tests whether file handle wasn't closed
assert buffer.getvalue().startswith(content)
# example from GH 13068
with tm.ensure_clean() as path:
with open(path, "w+b") as handle:
DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig")
handle.seek(0)
assert handle.read().startswith(b'\xef\xbb\xbf""')
def test_to_csv_iterative_compression_name(compression):
# GH 38714
df = tm.makeDataFrame()
with tm.ensure_clean() as path:
df.to_csv(path, compression=compression, chunksize=1)
tm.assert_frame_equal(
pd.read_csv(path, compression=compression, index_col=0), df
)
def test_to_csv_iterative_compression_buffer(compression):
# GH 38714
df = tm.makeDataFrame()
with io.BytesIO() as buffer:
df.to_csv(buffer, compression=compression, chunksize=1)
buffer.seek(0)
tm.assert_frame_equal(
pd.read_csv(buffer, compression=compression, index_col=0), df
)
assert not buffer.closed

View File

@@ -0,0 +1,337 @@
"""Tests formatting as writer-agnostic ExcelCells
ExcelFormatter is tested implicitly in pandas/tests/io/excel
"""
import string
import pytest
import pandas.util._test_decorators as td
import pandas._testing as tm
from pandas.io.formats.css import CSSWarning
from pandas.io.formats.excel import CSSToExcelConverter
@pytest.mark.parametrize(
"css,expected",
[
# FONT
# - name
("font-family: foo,bar", {"font": {"name": "foo"}}),
('font-family: "foo bar",baz', {"font": {"name": "foo bar"}}),
("font-family: foo,\nbar", {"font": {"name": "foo"}}),
("font-family: foo, bar, baz", {"font": {"name": "foo"}}),
("font-family: bar, foo", {"font": {"name": "bar"}}),
("font-family: 'foo bar', baz", {"font": {"name": "foo bar"}}),
("font-family: 'foo \\'bar', baz", {"font": {"name": "foo 'bar"}}),
('font-family: "foo \\"bar", baz', {"font": {"name": 'foo "bar'}}),
('font-family: "foo ,bar", baz', {"font": {"name": "foo ,bar"}}),
# - family
("font-family: serif", {"font": {"name": "serif", "family": 1}}),
("font-family: Serif", {"font": {"name": "serif", "family": 1}}),
("font-family: roman, serif", {"font": {"name": "roman", "family": 1}}),
("font-family: roman, sans-serif", {"font": {"name": "roman", "family": 2}}),
("font-family: roman, sans serif", {"font": {"name": "roman"}}),
("font-family: roman, sansserif", {"font": {"name": "roman"}}),
("font-family: roman, cursive", {"font": {"name": "roman", "family": 4}}),
("font-family: roman, fantasy", {"font": {"name": "roman", "family": 5}}),
# - size
("font-size: 1em", {"font": {"size": 12}}),
("font-size: xx-small", {"font": {"size": 6}}),
("font-size: x-small", {"font": {"size": 7.5}}),
("font-size: small", {"font": {"size": 9.6}}),
("font-size: medium", {"font": {"size": 12}}),
("font-size: large", {"font": {"size": 13.5}}),
("font-size: x-large", {"font": {"size": 18}}),
("font-size: xx-large", {"font": {"size": 24}}),
("font-size: 50%", {"font": {"size": 6}}),
# - bold
("font-weight: 100", {"font": {"bold": False}}),
("font-weight: 200", {"font": {"bold": False}}),
("font-weight: 300", {"font": {"bold": False}}),
("font-weight: 400", {"font": {"bold": False}}),
("font-weight: normal", {"font": {"bold": False}}),
("font-weight: lighter", {"font": {"bold": False}}),
("font-weight: bold", {"font": {"bold": True}}),
("font-weight: bolder", {"font": {"bold": True}}),
("font-weight: 700", {"font": {"bold": True}}),
("font-weight: 800", {"font": {"bold": True}}),
("font-weight: 900", {"font": {"bold": True}}),
# - italic
("font-style: italic", {"font": {"italic": True}}),
("font-style: oblique", {"font": {"italic": True}}),
# - underline
("text-decoration: underline", {"font": {"underline": "single"}}),
("text-decoration: overline", {}),
("text-decoration: none", {}),
# - strike
("text-decoration: line-through", {"font": {"strike": True}}),
(
"text-decoration: underline line-through",
{"font": {"strike": True, "underline": "single"}},
),
(
"text-decoration: underline; text-decoration: line-through",
{"font": {"strike": True}},
),
# - color
("color: red", {"font": {"color": "FF0000"}}),
("color: #ff0000", {"font": {"color": "FF0000"}}),
("color: #f0a", {"font": {"color": "FF00AA"}}),
# - shadow
("text-shadow: none", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px #CCC", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px #999", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px", {"font": {"shadow": False}}),
("text-shadow: 2px -0em 0px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -2em 0px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -0em 2px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -0em 2px", {"font": {"shadow": True}}),
("text-shadow: 0px -2em", {"font": {"shadow": True}}),
# FILL
# - color, fillType
(
"background-color: red",
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
),
(
"background-color: #ff0000",
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
),
(
"background-color: #f0a",
{"fill": {"fgColor": "FF00AA", "patternType": "solid"}},
),
# BORDER
# - style
(
"border-style: solid",
{
"border": {
"top": {"style": "medium"},
"bottom": {"style": "medium"},
"left": {"style": "medium"},
"right": {"style": "medium"},
}
},
),
(
"border-style: solid; border-width: thin",
{
"border": {
"top": {"style": "thin"},
"bottom": {"style": "thin"},
"left": {"style": "thin"},
"right": {"style": "thin"},
}
},
),
(
"border-top-style: solid; border-top-width: thin",
{"border": {"top": {"style": "thin"}}},
),
(
"border-top-style: solid; border-top-width: 1pt",
{"border": {"top": {"style": "thin"}}},
),
("border-top-style: solid", {"border": {"top": {"style": "medium"}}}),
(
"border-top-style: solid; border-top-width: medium",
{"border": {"top": {"style": "medium"}}},
),
(
"border-top-style: solid; border-top-width: 2pt",
{"border": {"top": {"style": "medium"}}},
),
(
"border-top-style: solid; border-top-width: thick",
{"border": {"top": {"style": "thick"}}},
),
(
"border-top-style: solid; border-top-width: 4pt",
{"border": {"top": {"style": "thick"}}},
),
(
"border-top-style: dotted",
{"border": {"top": {"style": "mediumDashDotDot"}}},
),
(
"border-top-style: dotted; border-top-width: thin",
{"border": {"top": {"style": "dotted"}}},
),
("border-top-style: dashed", {"border": {"top": {"style": "mediumDashed"}}}),
(
"border-top-style: dashed; border-top-width: thin",
{"border": {"top": {"style": "dashed"}}},
),
("border-top-style: double", {"border": {"top": {"style": "double"}}}),
# - color
(
"border-style: solid; border-color: #0000ff",
{
"border": {
"top": {"style": "medium", "color": "0000FF"},
"right": {"style": "medium", "color": "0000FF"},
"bottom": {"style": "medium", "color": "0000FF"},
"left": {"style": "medium", "color": "0000FF"},
}
},
),
(
"border-top-style: double; border-top-color: blue",
{"border": {"top": {"style": "double", "color": "0000FF"}}},
),
(
"border-top-style: solid; border-top-color: #06c",
{"border": {"top": {"style": "medium", "color": "0066CC"}}},
),
# ALIGNMENT
# - horizontal
("text-align: center", {"alignment": {"horizontal": "center"}}),
("text-align: left", {"alignment": {"horizontal": "left"}}),
("text-align: right", {"alignment": {"horizontal": "right"}}),
("text-align: justify", {"alignment": {"horizontal": "justify"}}),
# - vertical
("vertical-align: top", {"alignment": {"vertical": "top"}}),
("vertical-align: text-top", {"alignment": {"vertical": "top"}}),
("vertical-align: middle", {"alignment": {"vertical": "center"}}),
("vertical-align: bottom", {"alignment": {"vertical": "bottom"}}),
("vertical-align: text-bottom", {"alignment": {"vertical": "bottom"}}),
# - wrap_text
("white-space: nowrap", {"alignment": {"wrap_text": False}}),
("white-space: pre", {"alignment": {"wrap_text": False}}),
("white-space: pre-line", {"alignment": {"wrap_text": False}}),
("white-space: normal", {"alignment": {"wrap_text": True}}),
# NUMBER FORMAT
("number-format: 0%", {"number_format": {"format_code": "0%"}}),
(
"number-format: 0§[Red](0)§-§@;",
{"number_format": {"format_code": "0;[red](0);-;@"}}, # GH 46152
),
],
)
def test_css_to_excel(css, expected):
convert = CSSToExcelConverter()
assert expected == convert(css)
def test_css_to_excel_multiple():
convert = CSSToExcelConverter()
actual = convert(
"""
font-weight: bold;
text-decoration: underline;
color: red;
border-width: thin;
text-align: center;
vertical-align: top;
unused: something;
"""
)
assert {
"font": {"bold": True, "underline": "single", "color": "FF0000"},
"border": {
"top": {"style": "thin"},
"right": {"style": "thin"},
"bottom": {"style": "thin"},
"left": {"style": "thin"},
},
"alignment": {"horizontal": "center", "vertical": "top"},
} == actual
@pytest.mark.parametrize(
"css,inherited,expected",
[
("font-weight: bold", "", {"font": {"bold": True}}),
("", "font-weight: bold", {"font": {"bold": True}}),
(
"font-weight: bold",
"font-style: italic",
{"font": {"bold": True, "italic": True}},
),
("font-style: normal", "font-style: italic", {"font": {"italic": False}}),
("font-style: inherit", "", {}),
(
"font-style: normal; font-style: inherit",
"font-style: italic",
{"font": {"italic": True}},
),
],
)
def test_css_to_excel_inherited(css, inherited, expected):
convert = CSSToExcelConverter(inherited)
assert expected == convert(css)
@pytest.mark.parametrize(
"input_color,output_color",
(
list(CSSToExcelConverter.NAMED_COLORS.items())
+ [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()]
+ [("#F0F", "FF00FF"), ("#ABC", "AABBCC")]
),
)
def test_css_to_excel_good_colors(input_color, output_color):
# see gh-18392
css = (
f"border-top-color: {input_color}; "
f"border-right-color: {input_color}; "
f"border-bottom-color: {input_color}; "
f"border-left-color: {input_color}; "
f"background-color: {input_color}; "
f"color: {input_color}"
)
expected = {}
expected["fill"] = {"patternType": "solid", "fgColor": output_color}
expected["font"] = {"color": output_color}
expected["border"] = {
k: {"color": output_color} for k in ("top", "right", "bottom", "left")
}
with tm.assert_produces_warning(None):
convert = CSSToExcelConverter()
assert expected == convert(css)
@pytest.mark.parametrize("input_color", [None, "not-a-color"])
def test_css_to_excel_bad_colors(input_color):
# see gh-18392
css = (
f"border-top-color: {input_color}; "
f"border-right-color: {input_color}; "
f"border-bottom-color: {input_color}; "
f"border-left-color: {input_color}; "
f"background-color: {input_color}; "
f"color: {input_color}"
)
expected = {}
if input_color is not None:
expected["fill"] = {"patternType": "solid"}
with tm.assert_produces_warning(CSSWarning):
convert = CSSToExcelConverter()
assert expected == convert(css)
def tests_css_named_colors_valid():
upper_hexs = set(map(str.upper, string.hexdigits))
for color in CSSToExcelConverter.NAMED_COLORS.values():
assert len(color) == 6 and all(c in upper_hexs for c in color)
@td.skip_if_no_mpl
def test_css_named_colors_from_mpl_present():
from matplotlib.colors import CSS4_COLORS as mpl_colors
pd_colors = CSSToExcelConverter.NAMED_COLORS
for name, color in mpl_colors.items():
assert name in pd_colors and pd_colors[name] == color[1:]

Some files were not shown because too many files have changed in this diff Show More