the simplest way for your problem would be:
>>> s = 'a, b, , c, , , d, , , e, , , , , , , f'
>>> s = [x for x in s if x.isalpha[]]
>>> print[s]
['a', 'b', 'c', 'd', 'e', 'f']
then, use join[]
>>> ', '.join[s]
'a, b, c, d, e, f'
do it in one line:
>>> s = ', '.join[[x for x in s if x.isalpha[]]]
>>> s
'a, b, c, d, e, f'
Just figure other way:
>>> s = 'a, b, , c, , , d, , , e, , , , , , , f'
>>> s = s.split[] #split all ' '[>> s
['a,', 'b,', ',', 'c,', ',', ',', 'd,', ',', ',', 'e,', ',', ',', ',', ',', ',', ',', 'f']
>>> while ',' in s:
... s.remove[',']
>>> s
['a,', 'b,', 'c,', 'd,', 'e,', 'f']
>>> ''.join[s]
'a,b,c,d,e,f'
first, split the string with delim ",". anycodings_apache-spark Then use array_remove function to remove anycodings_apache-spark empty string. Join the array back to anycodings_apache-spark string. There is a trailing ",". To anycodings_apache-spark remove that a udf to drop the rightmost anycodings_apache-spark char in the string.
scala> df.show[false]
+--------------------------------------------------------------------------------------------------------------------------------------------------------+
|my_col |
+--------------------------------------------------------------------------------------------------------------------------------------------------------+
|,,,,104,,,,,,,111,,,114,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,169,,,,,174,,176,,,,,,,,,,,,, |
|,,,,104,,,,,,,111,,,114,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,169,,,,,174,,176,,,,,,,,,,,,, |
|,,,,104,,,,,,,111,,,114,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,169,,,,,174,,176,,,,,,,,,,,,, |
|,,,,104,,,,,,,111,,,114,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,160,,162,,,,,,,,,,,,174,,176,,,,,,,,,,,,, |
|,,,,104,,,,,,,111,,,114,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,169,,,,,174,,176,,,,,,,,,,,,, |
|,,,,104,,,,,,,111,,,,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,160,,162,,,,,,,169,,,,,174,,176,,,,,,,,,,,,, |
|,,,103,104,,,,,,,111,,,114,,,,,,,121,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,169,,,,,174,,176,,,,,,,,,,,,, |
|,101,102,,104,,,,,,,,,113,114,,,,,,,,,,,,,,,,130,131,,,,,,,,,,141,142,143,,,146,,,,150,,152,,,,,157,,,,,162,,,,,,,,,,,,174,,,,,,,,,,184,,,,, |
|,,,,104,,,,,,,,,113,,,,,,,,,,,,,,,,,,131,,,,,,,,,,141,142,143,,,146,,,,150,,,,,155,,157,,,,,162,,,,,,,169,,,,,174,,176,177,178,,,,,,,,,,, |
|,,,,104,,,,,,,111,,,114,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,,,,,,174,,176,,,,,,,,,,,,, |
|,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, |
|,,102,,104,,,,,,,,,113,114,,,,,,,,,,,,,,,,130,131,,,,,,,,,,141,142,143,,,146,,,,150,,152,,,,,157,,,,,162,,,,,,,,,,,,174,,,,,,,,,,,,,,, |
|,,,,104,,,,,,,111,112,,114,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,169,,,,,174,,176,,,,,,,,,,,,, |
|,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, |
|,,102,103,104,,,,,,,,,113,114,,,,,,,121,,,,,,,,,130,131,,,,,,,,,,141,142,143,,,146,,,,150,,152,,,,,157,,,160,,162,,,,,,,,,,,173,174,,176,,178,,,,,,,,,,,|
|,,,,104,,,,,,,111,,,114,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,,,,,,174,,176,,,,,,,,,,,,, |
|,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, |
|,,,103,104,,,,,,,111,,,114,,,,,,,121,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,169,,,,,174,,176,,,,,,,,,,,,, |
|,,,,104,,,,,,,111,,,114,,,,,,,,,,,,,,,,,131,,,,,,,,,,,,,,,,,,,,,,,,,,157,,,,,162,,,,,,,169,,,,,174,,176,,,,,,,,,,,,, |
|,,102,,104,,,,,,,,,113,114,,,,,,,,,,,,,,,,130,131,,,,,,,,,,141,142,143,,,146,,,,150,,152,,,,,157,,,,,162,,,,,,,,,,,,174,,,,,,,,,,,,,,, |
+--------------------------------------------------------------------------------------------------------------------------------------------------------+
scala> df.select[trim[array_join[array_remove[split[$"my_col", ","], ""],","]] as "my_col"].show[false]
+-----------------------------------------------------------------------------------+
|my_col |
+-----------------------------------------------------------------------------------+
|104,111,114,131,157,162,169,174,176, |
|104,111,114,131,157,162,169,174,176, |
|104,111,114,131,157,162,169,174,176, |
|104,111,114,131,157,160,162,174,176, |
|104,111,114,131,157,162,169,174,176, |
|104,111,131,157,160,162,169,174,176, |
|103,104,111,114,121,131,157,162,169,174,176, |
|101,102,104,113,114,130,131,141,142,143,146,150,152,157,162,174,184, |
|104,113,131,141,142,143,146,150,155,157,162,169,174,176,177,178, |
|104,111,114,131,157,162,174,176, |
| |
|102,104,113,114,130,131,141,142,143,146,150,152,157,162,174, |
|104,111,112,114,131,157,162,169,174,176, |
| |
|102,103,104,113,114,121,130,131,141,142,143,146,150,152,157,160,162,173,174,176,178|
|104,111,114,131,157,162,174,176, |
| |
|103,104,111,114,121,131,157,162,169,174,176, |
|104,111,114,131,157,162,169,174,176, |
|102,104,113,114,130,131,141,142,143,146,150,152,157,162,174, |
+-----------------------------------------------------------------------------------+
scala> val myUdf = udf{[x:String] => if[x.endsWith[","]]{x.dropRight[1]} else {x}}
myUdf: org.apache.spark.sql.expressions.UserDefinedFunction = UserDefinedFunction[,StringType,Some[List[StringType]]]
scala> df.select[myUdf[trim[array_join[array_remove[split[$"my_col", ","], ""],","]]] as "my_col"].show[false]
+-----------------------------------------------------------------------------------+
|my_col |
+-----------------------------------------------------------------------------------+
|104,111,114,131,157,162,169,174,176 |
|104,111,114,131,157,162,169,174,176 |
|104,111,114,131,157,162,169,174,176 |
|104,111,114,131,157,160,162,174,176 |
|104,111,114,131,157,162,169,174,176 |
|104,111,131,157,160,162,169,174,176 |
|103,104,111,114,121,131,157,162,169,174,176 |
|101,102,104,113,114,130,131,141,142,143,146,150,152,157,162,174,184 |
|104,113,131,141,142,143,146,150,155,157,162,169,174,176,177,178 |
|104,111,114,131,157,162,174,176 |
| |
|102,104,113,114,130,131,141,142,143,146,150,152,157,162,174 |
|104,111,112,114,131,157,162,169,174,176 |
| |
|102,103,104,113,114,121,130,131,141,142,143,146,150,152,157,160,162,173,174,176,178|
|104,111,114,131,157,162,174,176 |
| |
|103,104,111,114,121,131,157,162,169,174,176 |
|104,111,114,131,157,162,169,174,176 |
|102,104,113,114,130,131,141,142,143,146,150,152,157,162,174 |
+-----------------------------------------------------------------------------------+
How do you remove multiple commas in Python?
sub[] function to erase commas from the python string. The function re. sub[] is used to swap the substring. Also, it will replace any match with the other parameter, in this case, the null string, eliminating all commas from the string.
How do you replace multiple spaces in Python?
Use the re. sub[] method to replace multiple spaces with a single space, e.g. result = re. sub[' +', ' ', my_str] .
How do I remove commas from a string in Python?
Use str. replace[] to remove a comma from a string in Python
replace[',', ''] to replace every instance of a ',' in str with '' .