Python format number with commas

I'm using python 2.5 so I don't have access to the built-in formatting.

I looked at the Django code intcomma (intcomma_recurs in code below) and realized it's inefficient, because it's recursive and also compiling the regex on every run is not a good thing either. This is not necessary an 'issue' as django isn't really THAT focused on this kind of low-level performance. Also, I was expecting a factor of 10 difference in performance, but it's only 3 times slower.

Out of curiosity I implemented a few versions of intcomma to see what the performance advantages are when using regex. My test data concludes a slight advantage for this task, but surprisingly not much at all.

I also was pleased to see what I suspected: using the reverse xrange approach is unnecessary in the no-regex case, but it does make the code look slightly better at the cost of ~10% performance.

Also, I assume what you're passing in is a string and looks somewhat like a number. Results undetermined otherwise.

from __future__ import with_statement
from contextlib import contextmanager
import re,time

re_first_num = re.compile(r"\d")
def intcomma_noregex(value):
    end_offset, start_digit, period = len(value),re_first_num.search(value).start(),value.rfind('.')
    if period == -1:
        period=end_offset
    segments,_from_index,leftover = [],0,(period-start_digit) % 3
    for _index in xrange(start_digit+3 if not leftover else start_digit+leftover,period,3):
        segments.append(value[_from_index:_index])
        _from_index=_index
    if not segments:
        return value
    segments.append(value[_from_index:])
    return ','.join(segments)

def intcomma_noregex_reversed(value):
    end_offset, start_digit, period = len(value),re_first_num.search(value).start(),value.rfind('.')
    if period == -1:
        period=end_offset
    _from_index,segments = end_offset,[]
    for _index in xrange(period-3,start_digit,-3):
        segments.append(value[_index:_from_index])
        _from_index=_index
    if not segments:
        return value
    segments.append(value[:_from_index])
    return ','.join(reversed(segments))

re_3digits = re.compile(r'(?<=\d)\d{3}(?!\d)')
def intcomma(value):
    segments,last_endoffset=[],len(value)
    while last_endoffset > 3:
        digit_group = re_3digits.search(value,0,last_endoffset)
        if not digit_group:
            break
        segments.append(value[digit_group.start():last_endoffset])
        last_endoffset=digit_group.start()
    if not segments:
        return value
    if last_endoffset:
        segments.append(value[:last_endoffset])
    return ','.join(reversed(segments))

def intcomma_recurs(value):
    """
    Converts an integer to a string containing commas every three digits.
    For example, 3000 becomes '3,000' and 45000 becomes '45,000'.
    """
    new = re.sub("^(-?\d+)(\d{3})", '\g<1>,\g<2>', str(value))
    if value == new:
        return new
    else:
        return intcomma(new)

@contextmanager
def timed(save_time_func):
    begin=time.time()
    try:
        yield
    finally:
        save_time_func(time.time()-begin)

def testset_xsimple(func):
    func('5')

def testset_simple(func):
    func('567')

def testset_onecomma(func):
    func('567890')

def testset_complex(func):
    func('-1234567.024')

def testset_average(func):
    func('-1234567.024')
    func('567')
    func('5674')

if __name__ == '__main__':
    print 'Test results:'
    for test_data in ('5','567','1234','1234.56','-253892.045'):
        for func in (intcomma,intcomma_noregex,intcomma_noregex_reversed,intcomma_recurs):
            print func.__name__,test_data,func(test_data)
    times=[]
    def overhead(x):
        pass
    for test_run in xrange(1,4):
        for func in (intcomma,intcomma_noregex,intcomma_noregex_reversed,intcomma_recurs,overhead):
            for testset in (testset_xsimple,testset_simple,testset_onecomma,testset_complex,testset_average):
                for x in xrange(1000): # prime the test
                    testset(func)
                with timed(lambda x:times.append(((test_run,func,testset),x))):
                    for x in xrange(50000):
                        testset(func)
    for (test_run,func,testset),_delta in times:
        print test_run,func.__name__,testset.__name__,_delta

And here are the test results:

intcomma 5 5
intcomma_noregex 5 5
intcomma_noregex_reversed 5 5
intcomma_recurs 5 5
intcomma 567 567
intcomma_noregex 567 567
intcomma_noregex_reversed 567 567
intcomma_recurs 567 567
intcomma 1234 1,234
intcomma_noregex 1234 1,234
intcomma_noregex_reversed 1234 1,234
intcomma_recurs 1234 1,234
intcomma 1234.56 1,234.56
intcomma_noregex 1234.56 1,234.56
intcomma_noregex_reversed 1234.56 1,234.56
intcomma_recurs 1234.56 1,234.56
intcomma -253892.045 -253,892.045
intcomma_noregex -253892.045 -253,892.045
intcomma_noregex_reversed -253892.045 -253,892.045
intcomma_recurs -253892.045 -253,892.045
1 intcomma testset_xsimple 0.0410001277924
1 intcomma testset_simple 0.0369999408722
1 intcomma testset_onecomma 0.213000059128
1 intcomma testset_complex 0.296000003815
1 intcomma testset_average 0.503000020981
1 intcomma_noregex testset_xsimple 0.134000062943
1 intcomma_noregex testset_simple 0.134999990463
1 intcomma_noregex testset_onecomma 0.190999984741
1 intcomma_noregex testset_complex 0.209000110626
1 intcomma_noregex testset_average 0.513000011444
1 intcomma_noregex_reversed testset_xsimple 0.124000072479
1 intcomma_noregex_reversed testset_simple 0.12700009346
1 intcomma_noregex_reversed testset_onecomma 0.230000019073
1 intcomma_noregex_reversed testset_complex 0.236999988556
1 intcomma_noregex_reversed testset_average 0.56299996376
1 intcomma_recurs testset_xsimple 0.348000049591
1 intcomma_recurs testset_simple 0.34600019455
1 intcomma_recurs testset_onecomma 0.625
1 intcomma_recurs testset_complex 0.773999929428
1 intcomma_recurs testset_average 1.6890001297
1 overhead testset_xsimple 0.0179998874664
1 overhead testset_simple 0.0190000534058
1 overhead testset_onecomma 0.0190000534058
1 overhead testset_complex 0.0190000534058
1 overhead testset_average 0.0309998989105
2 intcomma testset_xsimple 0.0360000133514
2 intcomma testset_simple 0.0369999408722
2 intcomma testset_onecomma 0.207999944687
2 intcomma testset_complex 0.302000045776
2 intcomma testset_average 0.523000001907
2 intcomma_noregex testset_xsimple 0.139999866486
2 intcomma_noregex testset_simple 0.141000032425
2 intcomma_noregex testset_onecomma 0.203999996185
2 intcomma_noregex testset_complex 0.200999975204
2 intcomma_noregex testset_average 0.523000001907
2 intcomma_noregex_reversed testset_xsimple 0.130000114441
2 intcomma_noregex_reversed testset_simple 0.129999876022
2 intcomma_noregex_reversed testset_onecomma 0.236000061035
2 intcomma_noregex_reversed testset_complex 0.241999864578
2 intcomma_noregex_reversed testset_average 0.582999944687
2 intcomma_recurs testset_xsimple 0.351000070572
2 intcomma_recurs testset_simple 0.352999925613
2 intcomma_recurs testset_onecomma 0.648999929428
2 intcomma_recurs testset_complex 0.808000087738
2 intcomma_recurs testset_average 1.81900000572
2 overhead testset_xsimple 0.0189998149872
2 overhead testset_simple 0.0189998149872
2 overhead testset_onecomma 0.0190000534058
2 overhead testset_complex 0.0179998874664
2 overhead testset_average 0.0299999713898
3 intcomma testset_xsimple 0.0360000133514
3 intcomma testset_simple 0.0360000133514
3 intcomma testset_onecomma 0.210000038147
3 intcomma testset_complex 0.305999994278
3 intcomma testset_average 0.493000030518
3 intcomma_noregex testset_xsimple 0.131999969482
3 intcomma_noregex testset_simple 0.136000156403
3 intcomma_noregex testset_onecomma 0.192999839783
3 intcomma_noregex testset_complex 0.202000141144
3 intcomma_noregex testset_average 0.509999990463
3 intcomma_noregex_reversed testset_xsimple 0.125999927521
3 intcomma_noregex_reversed testset_simple 0.126999855042
3 intcomma_noregex_reversed testset_onecomma 0.235999822617
3 intcomma_noregex_reversed testset_complex 0.243000030518
3 intcomma_noregex_reversed testset_average 0.56200003624
3 intcomma_recurs testset_xsimple 0.337000131607
3 intcomma_recurs testset_simple 0.342000007629
3 intcomma_recurs testset_onecomma 0.609999895096
3 intcomma_recurs testset_complex 0.75
3 intcomma_recurs testset_average 1.68300008774
3 overhead testset_xsimple 0.0189998149872
3 overhead testset_simple 0.018000125885
3 overhead testset_onecomma 0.018000125885
3 overhead testset_complex 0.0179998874664
3 overhead testset_average 0.0299999713898

How do you print numbers with commas as thousands separators?

we can use my_string = '{:,. 2f}'. format(my_number) to convert float value into commas as thousands separators.

What is .2f in Python?

2f is a placeholder for floating point number. So %d is replaced by the first value of the tuple i.e 12 and %. 2f is replaced by second value i.e 150.87612 .

How do you add a comma to a number in a Dataframe in Python?

We use the python string format syntax '{:,. 0f}'. format to add the thousand comma separators to the numbers. Then we use python's map() function to iterate and apply the formatting to all the rows in the 'Median Sales Price' column.

How do you use thousand separator in Python?

Using the modern f-strings is, in my opinion, the most Pythonic solution to add commas as thousand-separators for all Python versions above 3.6: f'{1000000:,}' . The inner part within the curly brackets :, says to format the number and use commas as thousand separators.