Source code for sparkless.functions.bitwise

"""
Bitwise functions for Sparkless (PySpark 3.2+).

This module provides bitwise operations on integer columns.
"""

from typing import TYPE_CHECKING, Union

if TYPE_CHECKING:
    from sparkless.functions.base import AggregateFunction

from sparkless.functions.base import Column, ColumnOperation


[docs] class BitwiseFunctions: """Collection of bitwise manipulation functions."""
[docs] @staticmethod def bit_count(column: Union[Column, str]) -> ColumnOperation: """Count the number of set bits (population count). Args: column: Integer column. Returns: ColumnOperation representing the bit_count function. Example: >>> df.select(F.bit_count(F.col("value"))) """ if isinstance(column, str): column = Column(column) return ColumnOperation(column, "bit_count", name=f"bit_count({column.name})")
[docs] @staticmethod def bit_get(column: Union[Column, str], pos: int) -> ColumnOperation: """Get bit value at position. Args: column: Integer column. pos: Bit position (0-based, from right). Returns: ColumnOperation representing the bit_get function. Example: >>> df.select(F.bit_get(F.col("value"), 0)) """ if isinstance(column, str): column = Column(column) return ColumnOperation( column, "bit_get", pos, name=f"bit_get({column.name}, {pos})" )
[docs] @staticmethod def getbit(column: Union[Column, str], pos: int) -> ColumnOperation: """Get bit value at position (alias for bit_get) (PySpark 3.5+). Args: column: Integer column. pos: Bit position (0-based, from right). Returns: ColumnOperation representing the getbit function. Example: >>> df.select(F.getbit(F.col("value"), 0)) """ return BitwiseFunctions.bit_get(column, pos)
[docs] @staticmethod def bitwise_not(column: Union[Column, str]) -> ColumnOperation: """Perform bitwise NOT operation. Args: column: Integer column. Returns: ColumnOperation representing the bitwise_not function. Example: >>> df.select(F.bitwise_not(F.col("value"))) """ if isinstance(column, str): column = Column(column) return ColumnOperation( column, "bitwise_not", name=f"bitwise_not({column.name})" )
# Priority 2: Bitwise Aggregate Functions
[docs] @staticmethod def bit_and(column: Union[Column, str]) -> "AggregateFunction": """Aggregate function - bitwise AND of all values (PySpark 3.5+). Args: column: Integer column. Returns: AggregateFunction representing the bit_and aggregate function. Example: >>> df.groupBy("dept").agg(F.bit_and("flags")) """ from sparkless.functions.base import AggregateFunction from sparkless.spark_types import LongType return AggregateFunction(column, "bit_and", LongType())
[docs] @staticmethod def bit_or(column: Union[Column, str]) -> "AggregateFunction": """Aggregate function - bitwise OR of all values (PySpark 3.5+). Args: column: Integer column. Returns: AggregateFunction representing the bit_or aggregate function. Example: >>> df.groupBy("dept").agg(F.bit_or("flags")) """ from sparkless.functions.base import AggregateFunction from sparkless.spark_types import LongType return AggregateFunction(column, "bit_or", LongType())
[docs] @staticmethod def bit_xor(column: Union[Column, str]) -> "AggregateFunction": """Aggregate function - bitwise XOR of all values (PySpark 3.5+). Args: column: Integer column. Returns: AggregateFunction representing the bit_xor aggregate function. Example: >>> df.groupBy("dept").agg(F.bit_xor("flags")) """ from sparkless.functions.base import AggregateFunction from sparkless.spark_types import LongType return AggregateFunction(column, "bit_xor", LongType())
# Deprecated Aliases
[docs] @staticmethod def bitwiseNOT(column: Union[Column, str]) -> ColumnOperation: """Deprecated alias for bitwise_not (all PySpark versions). Use bitwise_not instead. Args: column: Integer column. Returns: ColumnOperation representing bitwise NOT. """ import warnings warnings.warn( "bitwiseNOT is deprecated. Use bitwise_not instead.", FutureWarning, stacklevel=2, ) return BitwiseFunctions.bitwise_not(column)
[docs] @staticmethod def shiftleft( column: Union[Column, str], num_bits: Union[Column, str, int] ) -> ColumnOperation: """Bitwise left shift. Args: column: Integer column. num_bits: Number of bits to shift left. Returns: ColumnOperation representing the shiftleft function. """ from sparkless.functions.base import Column if isinstance(column, str): column = Column(column) if isinstance(num_bits, (str, int)): num_bits = ( Column(str(num_bits)) if isinstance(num_bits, int) else Column(num_bits) ) operation = ColumnOperation( column, "shiftleft", num_bits, name=f"shiftleft({column.name}, {num_bits.name if hasattr(num_bits, 'name') else num_bits})", ) return operation
[docs] @staticmethod def shiftright( column: Union[Column, str], num_bits: Union[Column, str, int] ) -> ColumnOperation: """Bitwise right shift (signed). Args: column: Integer column. num_bits: Number of bits to shift right. Returns: ColumnOperation representing the shiftright function. """ from sparkless.functions.base import Column if isinstance(column, str): column = Column(column) if isinstance(num_bits, (str, int)): num_bits = ( Column(str(num_bits)) if isinstance(num_bits, int) else Column(num_bits) ) operation = ColumnOperation( column, "shiftright", num_bits, name=f"shiftright({column.name}, {num_bits.name if hasattr(num_bits, 'name') else num_bits})", ) return operation
[docs] @staticmethod def shiftrightunsigned( column: Union[Column, str], num_bits: Union[Column, str, int] ) -> ColumnOperation: """Bitwise unsigned right shift. Args: column: Integer column. num_bits: Number of bits to shift right. Returns: ColumnOperation representing the shiftrightunsigned function. """ from sparkless.functions.base import Column if isinstance(column, str): column = Column(column) if isinstance(num_bits, (str, int)): num_bits = ( Column(str(num_bits)) if isinstance(num_bits, int) else Column(num_bits) ) operation = ColumnOperation( column, "shiftrightunsigned", num_bits, name=f"shiftrightunsigned({column.name}, {num_bits.name if hasattr(num_bits, 'name') else num_bits})", ) return operation
# Deprecated camelCase aliases (PySpark 3.0-3.1)
[docs] @staticmethod def shiftLeft( column: Union[Column, str], num_bits: Union[Column, str, int] ) -> ColumnOperation: """Deprecated alias for shiftleft (PySpark 3.0-3.1). Use shiftleft instead. Args: column: Integer column. num_bits: Number of bits to shift left. Returns: ColumnOperation representing the shiftLeft function. """ import warnings warnings.warn( "shiftLeft is deprecated. Use shiftleft instead.", FutureWarning, stacklevel=2, ) return BitwiseFunctions.shiftleft(column, num_bits)
[docs] @staticmethod def shiftRight( column: Union[Column, str], num_bits: Union[Column, str, int] ) -> ColumnOperation: """Deprecated alias for shiftright (PySpark 3.0-3.1). Use shiftright instead. Args: column: Integer column. num_bits: Number of bits to shift right. Returns: ColumnOperation representing the shiftRight function. """ import warnings warnings.warn( "shiftRight is deprecated. Use shiftright instead.", FutureWarning, stacklevel=2, ) return BitwiseFunctions.shiftright(column, num_bits)
[docs] @staticmethod def shiftRightUnsigned( column: Union[Column, str], num_bits: Union[Column, str, int] ) -> ColumnOperation: """Deprecated alias for shiftrightunsigned (PySpark 3.0-3.1). Use shiftrightunsigned instead. Args: column: Integer column. num_bits: Number of bits to shift right. Returns: ColumnOperation representing the shiftRightUnsigned function. """ import warnings warnings.warn( "shiftRightUnsigned is deprecated. Use shiftrightunsigned instead.", FutureWarning, stacklevel=2, ) return BitwiseFunctions.shiftrightunsigned(column, num_bits)
# Bitmap Functions (PySpark 3.5+)
[docs] @staticmethod def bitmap_bit_position(column: Union[Column, str]) -> ColumnOperation: """Get the bit position in a bitmap (PySpark 3.5+). Args: column: Bitmap column. Returns: ColumnOperation representing the bitmap_bit_position function. Example: >>> df.select(F.bitmap_bit_position(F.col("bitmap"))) """ if isinstance(column, str): column = Column(column) return ColumnOperation( column, "bitmap_bit_position", name=f"bitmap_bit_position({column.name})" )
[docs] @staticmethod def bitmap_bucket_number(column: Union[Column, str]) -> ColumnOperation: """Get the bucket number in a bitmap (PySpark 3.5+). Args: column: Bitmap column. Returns: ColumnOperation representing the bitmap_bucket_number function. Example: >>> df.select(F.bitmap_bucket_number(F.col("bitmap"))) """ if isinstance(column, str): column = Column(column) return ColumnOperation( column, "bitmap_bucket_number", name=f"bitmap_bucket_number({column.name})" )
[docs] @staticmethod def bitmap_construct_agg(column: Union[Column, str]) -> "AggregateFunction": """Aggregate function - construct bitmap from values (PySpark 3.5+). Args: column: Integer column to construct bitmap from. Returns: AggregateFunction representing the bitmap_construct_agg function. Example: >>> df.groupBy("dept").agg(F.bitmap_construct_agg("id")) """ from sparkless.functions.base import AggregateFunction from sparkless.spark_types import BinaryType return AggregateFunction(column, "bitmap_construct_agg", BinaryType())
[docs] @staticmethod def bitmap_count(column: Union[Column, str]) -> ColumnOperation: """Count the number of set bits in a bitmap (PySpark 3.5+). Args: column: Bitmap column. Returns: ColumnOperation representing the bitmap_count function. Example: >>> df.select(F.bitmap_count(F.col("bitmap"))) """ if isinstance(column, str): column = Column(column) return ColumnOperation( column, "bitmap_count", name=f"bitmap_count({column.name})" )
[docs] @staticmethod def bitmap_or_agg(column: Union[Column, str]) -> "AggregateFunction": """Aggregate function - bitwise OR of bitmaps (PySpark 3.5+). Args: column: Bitmap column. Returns: AggregateFunction representing the bitmap_or_agg function. Example: >>> df.groupBy("dept").agg(F.bitmap_or_agg("bitmap")) """ from sparkless.functions.base import AggregateFunction from sparkless.spark_types import BinaryType return AggregateFunction(column, "bitmap_or_agg", BinaryType())