Skip to content

Port divide-by-constant magic number optimizations from simple_preopt to ISLE mid-end #6049

@fitzgen

Description

@fitzgen

This is the old simple preopt stuff for some number divided/remaindered by a non-power-of-two constant:

  • pub fn magic_u32(d: u32) -> MU32 {
    debug_assert_ne!(d, 0);
    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
    let mut do_add: bool = false;
    let mut p: i32 = 31;
    let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
    let mut q1: u32 = 0x80000000u32 / nc;
    let mut r1: u32 = 0x80000000u32 - q1 * nc;
    let mut q2: u32 = 0x7FFFFFFFu32 / d;
    let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
    loop {
    p = p + 1;
    if r1 >= nc - r1 {
    q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
    r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
    } else {
    q1 = u32::wrapping_mul(2, q1);
    r1 = 2 * r1;
    }
    if r2 + 1 >= d - r2 {
    if q2 >= 0x7FFFFFFFu32 {
    do_add = true;
    }
    q2 = 2 * q2 + 1;
    r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
    } else {
    if q2 >= 0x80000000u32 {
    do_add = true;
    }
    q2 = u32::wrapping_mul(2, q2);
    r2 = 2 * r2 + 1;
    }
    let delta: u32 = d - 1 - r2;
    if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
    break;
    }
    }
    MU32 {
    mul_by: q2 + 1,
    do_add,
    shift_by: p - 32,
    }
    }
    pub fn magic_u64(d: u64) -> MU64 {
    debug_assert_ne!(d, 0);
    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
    let mut do_add: bool = false;
    let mut p: i32 = 63;
    let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
    let mut q1: u64 = 0x8000000000000000u64 / nc;
    let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
    let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
    let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
    loop {
    p = p + 1;
    if r1 >= nc - r1 {
    q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
    r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
    } else {
    q1 = u64::wrapping_mul(2, q1);
    r1 = 2 * r1;
    }
    if r2 + 1 >= d - r2 {
    if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
    do_add = true;
    }
    q2 = 2 * q2 + 1;
    r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
    } else {
    if q2 >= 0x8000000000000000u64 {
    do_add = true;
    }
    q2 = u64::wrapping_mul(2, q2);
    r2 = 2 * r2 + 1;
    }
    let delta: u64 = d - 1 - r2;
    if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
    break;
    }
    }
    MU64 {
    mul_by: q2 + 1,
    do_add,
    shift_by: p - 64,
    }
    }
    pub fn magic_s32(d: i32) -> MS32 {
    debug_assert_ne!(d, -1);
    debug_assert_ne!(d, 0);
    debug_assert_ne!(d, 1);
    let two31: u32 = 0x80000000u32;
    let mut p: i32 = 31;
    let ad: u32 = i32::wrapping_abs(d) as u32;
    let t: u32 = two31 + ((d as u32) >> 31);
    let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
    let mut q1: u32 = two31 / anc;
    let mut r1: u32 = two31 - q1 * anc;
    let mut q2: u32 = two31 / ad;
    let mut r2: u32 = two31 - q2 * ad;
    loop {
    p = p + 1;
    q1 = 2 * q1;
    r1 = 2 * r1;
    if r1 >= anc {
    q1 = q1 + 1;
    r1 = r1 - anc;
    }
    q2 = 2 * q2;
    r2 = 2 * r2;
    if r2 >= ad {
    q2 = q2 + 1;
    r2 = r2 - ad;
    }
    let delta: u32 = ad - r2;
    if !(q1 < delta || (q1 == delta && r1 == 0)) {
    break;
    }
    }
    MS32 {
    mul_by: (if d < 0 {
    u32::wrapping_neg(q2 + 1)
    } else {
    q2 + 1
    }) as i32,
    shift_by: p - 32,
    }
    }
    pub fn magic_s64(d: i64) -> MS64 {
    debug_assert_ne!(d, -1);
    debug_assert_ne!(d, 0);
    debug_assert_ne!(d, 1);
    let two63: u64 = 0x8000000000000000u64;
    let mut p: i32 = 63;
    let ad: u64 = i64::wrapping_abs(d) as u64;
    let t: u64 = two63 + ((d as u64) >> 63);
    let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
    let mut q1: u64 = two63 / anc;
    let mut r1: u64 = two63 - q1 * anc;
    let mut q2: u64 = two63 / ad;
    let mut r2: u64 = two63 - q2 * ad;
    loop {
    p = p + 1;
    q1 = 2 * q1;
    r1 = 2 * r1;
    if r1 >= anc {
    q1 = q1 + 1;
    r1 = r1 - anc;
    }
    q2 = 2 * q2;
    r2 = 2 * r2;
    if r2 >= ad {
    q2 = q2 + 1;
    r2 = r2 - ad;
    }
    let delta: u64 = ad - r2;
    if !(q1 < delta || (q1 == delta && r1 == 0)) {
    break;
    }
    }
    MS64 {
    mul_by: (if d < 0 {
    u64::wrapping_neg(q2 + 1)
    } else {
    q2 + 1
    }) as i64,
    shift_by: p - 64,
    }
    }

  • // U32 div, rem by non-power-of-2
    DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) => {
    debug_assert!(d >= 3);
    let MU32 {
    mul_by,
    do_add,
    shift_by,
    } = magic_u32(d);
    let qf; // final quotient
    let q0 = pos.ins().iconst(I32, mul_by as i64);
    let q1 = pos.ins().umulhi(n1, q0);
    if do_add {
    debug_assert!(shift_by >= 1 && shift_by <= 32);
    let t1 = pos.ins().isub(n1, q1);
    let t2 = pos.ins().ushr_imm(t1, 1);
    let t3 = pos.ins().iadd(t2, q1);
    // I never found any case where shift_by == 1 here.
    // So there's no attempt to fold out a zero shift.
    debug_assert_ne!(shift_by, 1);
    qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64);
    } else {
    debug_assert!(shift_by >= 0 && shift_by <= 31);
    // Whereas there are known cases here for shift_by == 0.
    if shift_by > 0 {
    qf = pos.ins().ushr_imm(q1, shift_by as i64);
    } else {
    qf = q1;
    }
    }
    // Now qf holds the final quotient. If necessary calculate the
    // remainder instead.
    if is_rem {
    let tt = pos.ins().imul_imm(qf, d as i64);
    pos.func.dfg.replace(inst).isub(n1, tt);
    } else {
    replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
    }
    }

The ISLE + e-graphs mid-end work replaced simple_preopt, but never ported these magic constants over. We should port them over.

HOWEVER, right now we can't replace potentially-trapping instructions in ISLE, and we need that ability to do this.

Metadata

Metadata

Assignees

No one assigned

    Labels

    cranelift:mid-endclif-to-clif related passes, legalizations, etc...isleRelated to the ISLE domain-specific language

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions