Differential Privacy Mechanisms for DAP

Junye Chen Apple Inc. Email: junyec@apple.com

p(S, D1, r) <= exp(EPSILON) * p(S, D2, r)

p(S, D1, r) <= exp(EPSILON) * p(S, D2, r) + DELTA

class DpMechanism:
    # The data type applicable to this `DpMechanism`. The type is the
    # same for the noised data and the un-noised data.
    DataType = None
    # Debiased data type after removing bias added by the noise. For
    # most of the mechanisms, `DebiasedDataType == DataType`.
    DebiasedDataType = None

    def add_noise(self, data: DataType) -> DataType:
        """Add noise to a piece of input data. """
        raise NotImplementedError()

    def sample_noise(self, dimension: int) -> DataType:
        """
        Sample noise with the initialized `DpMechanism`. `dimension`
        is used to determine the length of the output if `DataType` is
        a list.
        """
        raise NotImplementedError()

    def debias(self,
               data: DataType,
               meas_count: int) -> DebiasedDataType:
        """
        Debias the data due to the added noise, based on the number of
        measurements `meas_count`. This doesn't apply to all DP
        mechanisms. Some Client randomization mechanisms need this
        functionality.
        """
        return data

-EPSILON_0 <= ln(P1(S, D, E) / P2(R, E)) <= EPSILON_0

import random

class SymmetricRappor(DpMechanism):
    DataType = list[int]
    # Debiasing produces an array of floats.
    DebiasedDataType = list[float]

    def __init__(self, eps0: float):
        self.eps0 = eps0
        self.p = 1.0 / (math.exp(eps0) + 1.0)

    def add_noise(self, data: DataType) -> DataType:
        # Apply binary randomized response at each coordinate, based
        # on Appendix C.1.1 of {{MJTB+22}}.
        return list(map(
            lambda x: 1 - x if self.coin_flip() else x,
            data
        ))

    def sample_noise(self, dimension: int) -> DataType:
        # Sample binary randomized response at each coordinate on an
        # all zero vector.
        return [int(self.coin_flip()) for coord in range(dimension)]

    def debias(self,
               data: DataType,
               meas_count: int) -> DebiasedDataType:
        # Debias the data based on Appendix C.1.2 of {{MJTB+22}}.
        exp_eps = math.exp(self.eps0)
        return list(map(
            lambda x: (x * (exp_eps + 1) / (exp_eps - 1) -
                       meas_count / (exp_eps - 1)),
            data
        ))

    def coin_flip(self):
        return random.random() < self.p

class DpPolicy:
    # Client measurement type.
    Measurement = None
    # Aggregate share type, owned by an Aggregator.
    AggShare = None
    # Aggregate result type, unsharded result from all Aggregators.
    AggResult = None
    # Debiased aggregate result type.
    DebiasedAggResult = None

    def add_noise_to_measurement(self,
                                 meas: Measurement,
                                 ) -> Measurement:
        """
        Add noise to measurement, if required by the Client
        randomization mechanism. The default implementation is to do
        nothing.
        """
        return meas

    def add_noise_to_agg_share(self,
                               agg_share: AggShare,
                               ) -> AggShare:
        """
        Add noise to aggregate share, if required by the Aggregator
        randomization mechanism. The default implementation is to do
        nothing.
        """
        return agg_share

    def debias_agg_result(self,
                          agg_result: AggResult,
                          meas_count: int,
                          ) -> DebiasedAggResult:
        """
        Debias aggregate result, if either of the Client or
        Aggregator randomization mechanism requires this operation,
        based on the number of measurements `meas_count`. The default
        implementation is to do nothing.
        """
        return agg_result

def run_vdaf_with_dp_policy(
    dp_policy: DpPolicy,
    Vdaf: Vdaf,
    agg_param: Vdaf.AggParam,
    measurements: list[DpPolicy.Measurement],
):
    """Run the DP policy with VDAF on a list of measurements."""
    nonces = [gen_rand(Vdaf.NONCE_SIZE)
              for _ in range(len(measurements))]
    verify_key = gen_rand(Vdaf.VERIFY_KEY_SIZE)

    out_shares = []
    for (nonce, measurement) in zip(nonces, measurements):
        assert len(nonce) == Vdaf.NONCE_SIZE

        # Each Client adds Client randomization noise to its
        # measurement.
        noisy_measurement = \
            dp_policy.add_noise_to_measurement(measurement)
        # Each Client shards its measurement into input shares.
        rand = gen_rand(Vdaf.RAND_SIZE)
        (public_share, input_shares) = \
            Vdaf.shard(noisy_measurement, nonce, rand)

        # Each Aggregator initializes its preparation state.
        prep_states = []
        outbound = []
        for j in range(Vdaf.SHARES):
            (state, share) = Vdaf.prep_init(verify_key, j,
                                            agg_param,
                                            nonce,
                                            public_share,
                                            input_shares[j])
            prep_states.append(state)
            outbound.append(share)

        # Aggregators recover their output shares.
        for i in range(Vdaf.ROUNDS-1):
            prep_msg = Vdaf.prep_shares_to_prep(agg_param,
                                                outbound)

            outbound = []
            for j in range(Vdaf.SHARES):
                out = Vdaf.prep_next(prep_states[j], prep_msg)
                (prep_states[j], out) = out
                outbound.append(out)

        # The final outputs of the prepare phase are the output shares.
        prep_msg = Vdaf.prep_shares_to_prep(agg_param,
                                            outbound)
        outbound = []
        for j in range(Vdaf.SHARES):
            out_share = Vdaf.prep_next(prep_states[j], prep_msg)
            outbound.append(out_share)

        out_shares.append(outbound)

    num_measurements = len(measurements)
    # Each Aggregator aggregates its output shares into an
    # aggregate share, and adds any Aggregator randomization
    # mechanism to its aggregate share. In a distributed VDAF
    # computation, the aggregate shares are sent over the network.
    agg_shares = []
    for j in range(Vdaf.SHARES):
        out_shares_j = [out[j] for out in out_shares]
        agg_share_j = Vdaf.aggregate(agg_param, out_shares_j)
        # Each Aggregator independently adds noise to its aggregate
        # share.
        noised_agg_share_j = \
            dp_policy.add_noise_to_agg_share(agg_share_j)
        agg_shares.append(noised_agg_share_j)

    # Collector unshards the aggregate.
    agg_result = Vdaf.unshard(agg_param, agg_shares,
                              num_measurements)
    # Debias aggregate result.
    debiased_agg_result = dp_policy.debias_agg_result(
        agg_result, num_measurements
    )
    return debiased_agg_result

class MultiHotHistogramWithClientRandomization(DpPolicy):
    Field = MultiHotHistogram.Field
    Measurement = MultiHotHistogram.Measurement
    AggShare = list[Field]
    AggResult = MultiHotHistogram.AggResult
    DebiasedAggResult = SymmetricRappor.DebiasedDataType

    def __init__(self, eps0: float):
        # TODO(junyechen1996): Justify how eps0 + batch_size can
        # achieve `(EPSILON, DELTA)`-DP.
        self.rappor = SymmetricRappor(eps0)

    def add_noise_to_measurement(self,
                                 meas: Measurement,
                                 ) -> Measurement:
        return self.rappor.add_noise(meas)

    def debias_agg_result(self,
                          agg_result: AggResult,
                          meas_count: int,
                          ) -> DebiasedAggResult:
        return self.rappor.debias(agg_result, meas_count)

class HistogramWithAggregatorRandomization(DpPolicy):
    Field = Histogram.Field
    # A measurement is an unsigned integer, indicating an index less
    # than `Histogram.length`.
    Measurement = Histogram.Measurement
    AggShare = list[Field]
    AggResult = Histogram.AggResult
    # The final aggregate result should be a vector of signed
    # integers, because discrete Gaussian could produce negative
    # noise that may have a larger absolute value than the count
    # before noise.
    DebiasedAggResult = list[int]

    def __init__(self, epsilon: float, delta: float):
        # TODO(junyechen1996): Consider using fixed precision or large
        # decimal for parameters like `epsilon` and `delta`. (#23)
        # Transforming an one-hot vector into another will affect two
        # coordinates, e.g. from [1, 0, 0] to [0, 1, 0], so the change
        # in L2-sensitivity is `sqrt(1 + 1) = sqrt(2)`.
        dgauss_sigma = agm.calibrateAnalyticGaussianMechanism(
            epsilon, delta, math.sqrt(2.0)
        )
        self.dgauss_mechanism = \
            DiscreteGaussianWithZeroMean(dgauss_sigma)

    def add_noise_to_agg_share(self,
                               agg_share: AggShare,
                               ) -> AggShare:
        """
        Sample discrete Gaussian noise, and merge it with the
        aggregate share.
        """
        noise_vec = self.dgauss_mechanism.sample_noise(len(agg_share))
        result = []
        for (agg_share_i, noise_vec_i) in zip(agg_share, noise_vec):
            if noise_vec_i < 0:
                noise_vec_i = Field.MODULUS + noise_vec_i
            result.append(agg_share_i + self.Field(noise_vec_i))
        return result

    def debias_agg_result(self,
                          agg_result: AggResult,
                          meas_count: int,
                          ) -> DebiasedAggResult:
        # TODO(junyechen1996): Interpret large unsigned integers as
        # negative values or 0 properly. For now, directly return it,
        # since we haven't fully implemented discrete Gaussian
        # mechanism (#10).
        return agg_result

Internet-Draft	DP-PPM	February 2025
Chen, et al.	Expires 8 August 2025	[Page]

EPSILON	DELTA	Standard deviation	Internal Parameters
0.317	1e-9	26.1337	n = 100000, EPSILON_0 = 5.0
0.906	1e-9	12.2800	n = 100000, EPSILON_0 = 6.5
1.528	1e-9	9.5580	n = 100000, EPSILON_0 = 7.0

EPSILON	DELTA	Standard deviation	Standard deviation (OAOC)
0.317	1e-9	33.0788	23.3903
0.906	1e-9	12.0777	8.5402
1.528	1e-9	7.3403	5.1904