@inproceedings{Aanjaneya:2023:FPERLIBM, author = {Aanjaneya, Mridul and Nagarakatte, Santosh}, title = {Fast Polynomial Evaluation for Correctly Rounded Elementary Functions Using the RLIBM Approach}, year = {2023}, isbn = {9798400701016}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3579990.3580022}, doi = {10.1145/3579990.3580022}, abstract = {This paper proposes fast polynomial evaluation methods for correctly rounded elementary functions generated using our RLibm approach. The resulting functions produce correct results for all inputs with multiple representations and rounding modes. Given an oracle, the RLibm approach approximates the correctly rounded result rather than the real value of an elementary function. A key observation is that there is an interval of real values around the correctly rounded result such that any real value in it rounds to the correct result. This interval is the maximum freedom available to RLibm’s polynomial generation procedure. Subsequently, the problem of generating correctly rounded elementary functions using these intervals can be structured as a linear programming problem. Our prior work on the RLibm approach uses Horner’s method for polynomial evaluation. This paper explores polynomial evaluation techniques such as Knuth’s coefficient adaptation procedure, parallel execution of operations using Estrin’s procedure, and the use of fused multiply-add operations in the context of the RLibm approach. If we take the polynomial generated by the RLibm approach and subsequently perform polynomial evaluation optimizations, it results in incorrect results due to rounding errors during polynomial evaluation. Hence, we propose to integrate the fast polynomial evaluation procedure in the RLibm’s polynomial generation process. Our new polynomial evaluation procedure that combines parallel execution with fused multiply-add operations outperforms the Horner’s method used by RLibm’s correctly rounded functions. We show the resulting polynomials for 32-bit float are not only correct but also faster than prior functions in RLibm by 24%}, booktitle = {Proceedings of the 21st ACM/IEEE International Symposium on Code Generation and Optimization}, pages = {95–107}, numpages = {13}, keywords = {fused-multiply-add, coefficient adaptation, RLIBM, correctly rounded, Estrin's procedure, Horner's method}, location = {Montr\'{e}al, QC, Canada}, series = {CGO 2023} }