/* emc.c -- Integer factorization using the Elliptic Curve Method See http://www.loria.fr/~zimmerma/records/ecmnet.html Copyright (C) 1998 Paul Zimmermann, INRIA Lorraine, zimmerma@loria.fr See http://www.loria.fr/~zimmerma/records/ecmnet.html This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Changes with respect to 2a: - use base-2 division for factors of 2^k+/-1 - added division using pre-inversion (macro PREINVERT): no saving - now prints a warning for probable prime input - now checks if factors found are composite or not - now checks for prime powers Changes with respect to 2b: - saved a factor of two in step 2 initialization, and a factor of two in memory needed by step 2 - changed B2 and m in step 2 to be double's --> no overflow any more, even on 32-bit machines. - fixed bug for multiple-line input (thanks to Torbjorn and P. Leyland) Changes with respect to 2c: - added LARGE macro for large input, like Fermat numbers, to disable printing of input number, values of A and x - now does primality and perfect-power tests only with CHECK=1 (default 0) Changes with respect to 2d: - no normalization by default in step 1 - no gcd after each prime in step 1 (only one at the end of step 1), unless GROUPORDER is defined (useful for computing group order) - no gcd after each giant-step in phase 2 (only one at the end), unless GROUPORDER is defined - in step 2, if NOGCD if defined, does no gcd (3 times more multiplications) ==> useful for Fermat numbers - included changes from Alex Stuebinger for ANSIONLY compilers (08/04/98) - LARGE=1 by default (does not print A=..., x=...) - removed compare2 (always use base-2 division for divisors of 2^n+/-1) Changes with respect to 2f: - Change several variables from type double to just int. - Change sieve table (pr) to have values 0 and 1, not '0' and '1'. - Change pr to unsigned char (faster on alpha, sparc, etc). - Avoid repeated %q in step2 (as well as step1). - Sieve only on odd i's in pr[i] (step 1) and even i's in step 2 - Avoid expensive %q computations in step 2 (now only at initialization) Changes with respect to 2g: - combine k prime powers in step 1 and replace k muls by one inverse To do: - use Peter's trick to replace k gcdexts by one gcdext and (k-1) muls (both in step 1 and 2) Changes with respect to 2h: - removed PREINVERT stuff - new add3 procedure: removed all mpz_set in multiply - implemented Peter Montgomery's PRAC algorithm [4] Changes with respect to 2i: - implemented Peter Montgomery's MODMULN algorithm Changes with respect to 2ia: - completely rewritten step 2 - removed NOGCD stuff Changes with respect to version 3a: - added GNU General Public License - removed REDC stuff Changes in version 4 (April 1999): - uses fast multipoint polynomial evaluation in step 2, together with O(n^1.59) Karatsuba multiplication - uses k passes in step 2: cost [30+12*(k-1)]*M(n/2) Changes in version 4a: - cost of recip reduced to 9/2*M(n/2) instead of 6*M(n/2) i.e. cost of step 2 is now [57/2+12*(k-1)]*M(n/2) - improved help Changes in version 4b (November 1999): - replaced use of reciprocals in polyeval by Burnickel/Ziegler algorithm i.e. cost of step 2 is now (4k+2)*K(n) Changes in version 4c (December 1999): - in step 2, call mpz_mod only *after* Karatsuba (i.e. 2*k calls to mpz_mod instead of k^1.59) This version uses Montgomery's form (8) from [2] which avoids gcds: b*y^2*z = x^3 + a*x^2*z + x*z^2 References: [1] "Speeding the Pollard and Elliptic Curve Methods of Factorization", by Peter Montgomery, Math. of Comp. 48 (177), pages 243-264, January 1987. [2] "Factorization of the tenth and eleventh Fermat numbers", by Richard Brent, ftp://nimbus.anu.edu.au/pub/Brent/rpb161tr.dvi.gz [3] Torbjorn Granlund, Peter L. Montgomery: Division by Invariant Integers using Multiplication. PLDI 1994: 61-72, SIGPLAN Notices 29(6) (June 1994) [4] "Evaluating recurrences of form X_{m+n} = f(X_m,X_n,X_{m-n}) via Lucas chains", Peter Montgomery, ftp.cwi.nl:/pub/pmontgom/Lucas.ps.gz Examples (log and timing lines omitted): % echo 137703491 | ecm 100 6 ********** Factor found in step 1: 17389 (if compiled with -DGROUPORDER) % echo 137703491 | ecm 100 13 ********** Factor found in step 2: 7919 (bug found by T. Granlund in 1st version of ecm2f) % echo 17061648125571273329563156588435816942778260706938821014533 | ecm 174000 585928442 ********** Factor found in step 2: 4562371492227327125110177 From [2], page 15 (factorization of 55^126+1): % echo 5394204444759808120647321820789847518754252780933425517607611172590240019087317088600360602042567541009369753816111824690753627535877960715703346991252857 | ecm 345551 805816989 ********** Factor found in step 1: 25233450176615986500234063824208915571213 % ecm 314263 14152267 4677853 < F10.cofactor Input number is 607820568181834328745927047401406785398975700821911559763928675076909152806525747797078707978021962487854849079350770968904705424125269800765765006449689562590686195386366153585734177565092347016126765195631310982002631912943551551593959032889971392442015624176361633631364310142874363629569 ********** Factor found in step 2: 4659775785220018543264560743076778192897 # first Cunningham factor found by GMP-ECM (06 Dec 1997) % echo 449590253344339769860648131841615148645295989319968106906219761704350259884936939123964073775456979170209297434164627098624602597663490109944575251386017 | ecm 1000000 63844855 ********** Factor found in step 2: 241421225374647262615077397 # p48 found by Richard Brent on October 9, 1997 % echo 3923385745693995079670229419275984584311007321932374190635656246740175165573932140787529348954892963218868359081838772941945556717 | ecm 141667 876329474 150814537 ********** Factor found in step 2: 662926550178509475639682769961460088456141816377 # p45 found by Richard Brent on October 24, 1997 % echo 89101594496537524661600025466303491594098940711325290746374420963129505171895306244425914080753573576861992127359576789001 | ecm 325001 877655087 1032299 ********** Factor found in step 2: 122213491239590733375594767461662771175707001 */ #include #include #include #ifndef IRIX /* conflict with gcc math.h */ #include #endif #include #include #include #ifndef ANSIONLY #include #include #endif #define MODMULN #include "gmp.h" #ifdef MODMULN #include "gmp-impl.h" #include "longlong.h" mp_limb_t Nprim; #endif #ifndef max #define max(a,b) (((a)>(b)) ? (a) : (b)) #define min(a,b) (((a)<(b)) ? (a) : (b)) #endif #ifdef NORANDOM long int random(void) { return abs(rand() ^ (rand()<<16)); } #endif /* ANSI Prototypes */ extern void printout(__mpz_struct *,__mpz_struct *); extern int ecm(__mpz_struct *,__mpz_struct *,__mpz_struct *,double,int,int); extern int step1(__mpz_struct *); extern void initprimes(double,int ); extern void prac(unsigned int); extern void add3(__mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *); extern void duplicate(__mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *); extern int step2(__mpz_struct *,__mpz_struct *,unsigned int,double, __mpz_struct *,__mpz_struct *,int,unsigned int); extern int cputime(void); extern int isbase2(__mpz_struct *,double); extern void mod2plus(__mpz_struct *,__mpz_struct *,__mpz_struct *); extern void mod2minus(__mpz_struct *,__mpz_struct *,__mpz_struct *); extern void mpz_mod_n(__mpz_struct *,__mpz_struct *,__mpz_struct *); void (*mod)(mpz_t a,mpz_t b,mpz_t n)=NULL; extern int multiplyW(__mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *,unsigned int,__mpz_struct *, __mpz_struct *,__mpz_struct *,__mpz_struct *); extern int multiplyW2(__mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *,__mpz_struct *); extern int subW(__mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *); extern int addWn(__mpz_struct *,mpz_t *,mpz_t *, __mpz_struct *,mpz_t *,mpz_t *,int); extern int duplicateW(__mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *,__mpz_struct *); extern int addW(__mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *, __mpz_struct *,__mpz_struct *); extern void polyeval(mpz_t *,mpz_t **,mpz_t *,unsigned int); extern void buildF(mpz_t **,mpz_t *,unsigned int); extern double default_B2(double); extern void polymul(mpz_t*,mpz_t*,mpz_t*,unsigned int,mpz_t*); extern void karatsuba(mpz_t*,mpz_t*,mpz_t*,unsigned int,mpz_t*); extern void karatsuba1(mpz_t*,mpz_t*,mpz_t*,unsigned int,mpz_t*); extern void buildG(mpz_t*,mpz_t*,unsigned int); extern void polymulmod(mpz_t*,mpz_t*,mpz_t*,int); extern int initpoly(int,mpz_t*,mpz_t*,mpz_t,mpz_t,mpz_t,mpz_t,int,int); extern void set_dickson(mpz_t*, int); extern void RecursiveDivision(mpz_t *,mpz_t *,mpz_t *,unsigned int,mpz_t *); extern void DivThreeLongHalvesByTwo(mpz_t *,mpz_t *,mpz_t *, unsigned int, mpz_t *); #ifndef ANSIONLY #ifndef ULTRIX /* from Paul Leyland: otherwise produces a syntax error */ extern void srand48(); extern pid_t getpid(); struct tms ti; #endif #endif /* global variables */ unsigned int bb,*prime,nbprimes,mul,gcdexts,lgn,verbose=0,base2=1,go=0; mp_size_t sizen; int B1,ispower2,use_dickson=0; long lrand48(); unsigned char *pr; mpz_t a,b,n,initial_n,u,v,w,x,z,x1,z1,x2,z2,one,y,invn,*Rmod, x3,z3,x4,z4,*dick=NULL; mp_limb_t **Rmodd; unsigned int nb_digits(n) mpz_t n; { unsigned int size; char *str; str = mpz_get_str(NULL,10,n); size = strlen(str); free(str); return size; } void clear_all() { mpz_clear(a); mpz_clear(b); mpz_clear(n); mpz_clear(u); mpz_clear(v); mpz_clear(w); mpz_clear(x); mpz_clear(x1); mpz_clear(z1); mpz_clear(x2); mpz_clear(z2); mpz_clear(z); mpz_clear(one); mpz_clear(y); mpz_clear(invn); mpz_clear(x3); mpz_clear(z3); mpz_clear(x4); mpz_clear(z4); mpz_clear(initial_n); } int main(argc,argv) int argc; char *argv[]; { int r,e=0,iter=0; char c='0'; mpz_t p,s; double B2; while (argc>1 && argv[1][0]=='-') { if (strcmp(argv[1],"-v")==0) { verbose=1; argv += 1; argc -= 1; } else if (strcmp(argv[1],"-go")==0) { go=1; argv += 1; argc -= 1; } else if (strcmp(argv[1],"-nobase2")==0) { base2=0; argv += 1; argc -= 1; } else if (strcmp(argv[1],"-dickson")==0) { use_dickson=1; argv += 1; argc -= 1; } else if (argc>2 && strcmp(argv[1],"-e")==0) { e = atoi(argv[2]); argv += 2; argc -= 2; } else if (argc>2 && strcmp(argv[1],"-k")==0) { iter = atoi(argv[2]); argv += 2; argc -= 2; } else { printf("unknown option: %s\n",argv[1]); exit(1); } } if (argc<2) { printf("Usage: ecm [-v] [-go] [-nobase2] [-dickson] [-e n] [-k n] B1 [sigma [B2]] < file\n"); printf(" ecm [-v] [-e n] [-k n] B1 A B2 x1 < file\n"); printf("\n"); printf("Parameters:\n"); printf(" B1 step 1 limit\n"); printf(" sigma elliptic curve seed (0 gives random curve)\n"); printf(" B2 step 2 limit (default is >= 100*B1)\n"); printf(" A, x1 elliptic curve parameter and starting point\n"); printf(" file file of numbers to factor, one per line (or standard input)\n"); printf("\n"); printf("Options:\n"); printf(" -v verbose\n"); printf(" -go determines largest group order factor (slow)\n"); printf(" -nobase2 disable special division for factors of 2^n+/-1\n"); printf(" -dickson use Dickson's n-th polynomial instead of x^n\n"); printf(" -e n impose polynomial x^n for Brent-Suyama's extension\n"); printf(" -k n perform at most n passes in step 2\n"); exit(1); } printf("GMP-ECM 4c, by P. Zimmermann (Inria), 16 Dec 1999, with contributions from\n"); printf("T. Granlund, P. Leyland, C. Curry, A. Stuebinger, G. Woltman, JC. Meyrignac,\n"); printf("A. Yamasaki, and the invaluable help from P.L. Montgomery.\n"); mpz_init(a); mpz_init(b); mpz_init(n); mpz_init(u); mpz_init(v); mpz_init(w); mpz_init(x); mpz_init(x1); mpz_init(z1); mpz_init(x2); mpz_init(z2); mpz_init(z); mpz_init(s); mpz_init(x3); mpz_init(z3); mpz_init(x4); mpz_init(z4); mpz_init(initial_n); mpz_init_set_ui(one,1); mpz_init(p); mpz_init(y); mpz_init(invn); B1 = atoi(argv[1]); if (B1<0) { printf("Error: negative B1\n"); exit(1); } /* initialize table of primes */ bb=0; initprimes((double)B1,0); /* s = (argc>=3) ? atoi(argv[2]) : 0; */ if (argc>=3) mpz_set_str(s,argv[2],10); else mpz_set_ui(s,0); if (argc>=4) B2 = atof(argv[3]); else /* Default B2 */ B2 = default_B2((double) B1); if (argc>=5) { mpz_set_str(a,argv[2],10); mpz_set_str(x,argv[4],10); } else mpz_set_ui(x,0); #ifdef ANSIONLY if (mpz_cmp_ui(s,0)==0) { time_t timer; struct tm tp; time(&timer); tp = *localtime(&timer); srand(tp.tm_hour * 3600 + tp.tm_min * 60 + tp.tm_sec); } #else if (mpz_cmp_ui(s,0)==0) { #ifdef __DJGPP__ srandom(time(NULL) + getpid()); /* thanks to Conrad Curry */ #else struct timeval tp; gettimeofday(&tp, NULL); srand48(65536 * tp.tv_sec + tp.tv_usec + getpid()); #endif } #endif while (!feof(stdin)) { mpz_inp_str(n,stdin,0); #ifndef VERYLARGE { char *str; if (mpz_sizeinbase (n, 10) <= 1000) str = mpz_get_str(NULL,10,n); else str = "too large for my taste\n"; printf("Input number is %s", str); if (mpz_size (n) <= 100) { printf(" (%u digits)\n",strlen(str)); fflush(stdout); free(str); } } #endif if ((r=ecm(p,n,s,B2,e,iter))) { if (mpz_cmp(p,n)) { if (mpz_probab_prime_p(p,25)) printf("Found probable prime factor"); else printf("Found COMPOSITE factor"); printf(" of %u digits: ",nb_digits(p)); mpz_out_str(stdout,10,p); putchar('\n'); if (mpz_probab_prime_p(p,25) && nb_digits(p)>=48) { printf("Report your potential champion to Richard Brent \n"); printf("(see ftp://ftp.comlab.ox.ac.uk/pub/Documents/techpapers/Richard.Brent/champs.txt)\n"); } mpz_divexact(n,n,p); if (mpz_probab_prime_p(n,25)) printf("Probable prime"); else printf("Composite"); printf(" cofactor "); mpz_out_str(stdout,10,n); printf(" has %u digits",nb_digits(n)); } else printf("Found input number N"); printf("\n"); fflush(stdout); } while (!feof(stdin) && !isdigit(c=getchar())); /* exit with 0 iff a factor found for the last input. Allows to do: while ecm 1000000 0 && 2*(iter-1)*K*(double)D>=B2) iter--; e = (double) best_e(K); c2 = 6.0*e*((double)D/3.0) /* computation of nQx */ + (4.0*iter+2.0)*pow((double)K, log(3.0)/log(2.0)) /* poly. stuff */ + 6.0*e*iter*(double)K; /* computation of (2D)^m*Q */ if (verbose) printf("estimated muls for B2=%u*%u*%u: %1.0f\n", (unsigned) iter, K-1, D, c2); if (c2 random) returns 0 iff no factor found iter=0 means choice left to the program, otherwise imposed */ int ecm(p,n,s,B2,e,iter) mpz_t p,n,s; double B2; int e,iter; { unsigned int st,res,D,K,phi; mul=0; gcdexts=0; sizen=mpz_size(n); mpz_set(initial_n, n); mpz_set_ui(p,6); mpz_gcd(p,n,p); if (mpz_cmp(p,one)) return(1); /* now gcd(n,6)=1 */ /* slower in step 1 than usual division for 2,568+ c120 (1.43) but faster for 2,671- c145 (1.40) */ if ((base2 && (ispower2 = isbase2(n,1.4)))) { printf("recognized factor of 2^%d",(ispower2>0) ? ispower2 : -ispower2); if (ispower2>0) { mod=mod2plus; printf("+"); } else { mod=mod2minus; printf("-"); } printf("1, using special base-2 division\n"); fflush(stdout); } else #ifdef MODMULN { mod=mpz_mod_n; mpz_set_ui(v,1); mpz_mul_2exp(v,v,mp_bits_per_limb); /* v=2^k */ mpz_gcdext(z,u,NULL,n,v); /* z should be 1 since n is odd and v a power of 2 */ if (mpz_cmp_ui(z,1)!=0) { fprintf(stderr,"gcd(n,R) is not 1\n"); exit(1);} mpz_neg(u,u); mpz_mod(u,u,v); Nprim=PTR(u)[0]; /* Nprim * n = -1 mod v=2^k, the word base */ } #else mod=(void*)mpz_mod; #endif if (iter==0) iter=12; /* max number of iterations */ D=bestD(B2/iter,&K,&phi); if (e==0) e=best_e(K); if (e==1) use_dickson=0; /* Dickson(1) = x^1 */ if (use_dickson && dick==NULL) { dick=(mpz_t*) malloc((e/2+1)*sizeof(mpz_t)); set_dickson(dick, e); } /* adjust iter if too large */ if (B2>0.0) while (2*(iter-1)*(K-1)*(double)D>=B2) iter--; if (2*iter*(K-1)*(double)D0) { mpz_mul(c,c,invR); mpz_mod(c,c,n); } mpz_clear(invR); mpz_clear(g); mpz_clear(R); } #endif /* returns 0 iff no factor found, otherwise returns factor in p */ int step1(p) mpz_t p; { unsigned int l,i,j,q,imax,lmax,pp; #ifdef MODMULN if (ispower2==0) { /* multiply (x,z) by R^sizen */ mod_mul2exp(x,n,sizen); mod_mul2exp(z,n,sizen); mod_mul2exp(b,n,sizen); /* for duplicate */ _mpz_realloc(x,2*sizen+1); _mpz_realloc(z,2*sizen+1); _mpz_realloc(x1,2*sizen+1); _mpz_realloc(z1,2*sizen+1); _mpz_realloc(x2,2*sizen+1); _mpz_realloc(z2,2*sizen+1); _mpz_realloc(x3,2*sizen+1); _mpz_realloc(z3,2*sizen+1); _mpz_realloc(x4,2*sizen+1); _mpz_realloc(z4,2*sizen+1); _mpz_realloc(u,2*sizen+1); _mpz_realloc(v,2*sizen+1); _mpz_realloc(w,2*sizen+1); } #endif /* treat the cases p=2 and p=3 separately */ for (q=2;q<=B1;q*=2) duplicate(x,z,x,z); for (q=3;q<=B1;q*=3) { duplicate(x1,z1,x,z); add3(x,z,x,z,x1,z1,x,z); } lmax = B1/bb; for (l=0;l<=lmax;l++) { /* check range l*bb <= p < (l+1)*bb */ if (l) { /* sieve primes, pr[i] corresponds to l*bb+i */ for (i=0;ib) b=i; if (b%2) b++; /* ensures b is even for Step 1 */ if (b<=(int)bb) return; /* already done */ if (pr != NULL) free(pr); pr = (unsigned char*) malloc(b+1); /* compute primes up to b */ for (i=2;i<=b;i++) pr[i]=1; j=2; do { for (i=j*j;i<=b;i+=j) pr[i]=0; while (pr[++j]==0); } while (j*j<=b); for (nbprimes=0,i=2;i<=b;i++) if (pr[i]!=0) nbprimes++; if (prime != NULL) free(prime); prime = (unsigned int*) malloc((nbprimes+1)*sizeof(int)); for (j=0,i=2;i<=b;i++) if (pr[i]!=0) prime[++j]=i; bb=b; } #define ADD 6 /* number of multiplications in an addition */ #define DUP 5 /* number of multiplications in a duplicate */ #define START(d,v) ((d)/1.6180339887498948482-128.0+(v)) /* returns the number of modular multiplications */ unsigned int lucas_cost(n, v) unsigned int n; double v; { unsigned int c,d,e,r; d=n; r=(unsigned int)((double)d/v+0.5); if (r>=n) return(ADD*n); d=n-r; e=2*r-n; c=DUP+ADD; /* initial duplicate and final addition */ while (d!=e) { if (d2. */ void prac(n) unsigned int n; { unsigned int d,e,r,i=0; __mpz_struct *xA,*zA,*xB,*zB,*xC,*zC,*xT,*zT,*xT2,*zT2,*t; static double v[10] = {1.61803398875,1.72360679775,1.618347119656,1.617914406529,1.612429949509, 1.632839806089,1.620181980807,1.580178728295,1.617214616534,1.38196601125}; /* chooses the best value of v */ for (d=0,r=ADD*n;d1) { if (n%2!=0) { printf("Error: not a power of two\n"); exit(1); } n/=2; k++; } return k; } void printpol(f,k,flag) mpz_t *f; unsigned int k,flag; { int i; for (i=0;i0 && mpz_cmp_ui(f[i],0)>=0) printf("+"); mpz_out_str(stdout,10,f[i]); if (i>0) { printf("*x"); if (i>1) printf("^%d",i); } } if (flag) printf("+x^%d",k); printf("\n"); } /* Step 2: improved standard continuation, cf [2] p. 7-8. - p: variable to put factor found - n: number to factor - B1,B2: bounds for step 1 and 2 - x: x-coordinate of Q at the end of step 1 (z normalized to 1) - a: parameter from the curve b*y^2 = x^3 + a*x^2 + x used in step 1 Returns 0 iff no factor found, otherwise puts factor in p. 1. Computation of (6i+1)^e*Q: (D/3)*(6e) multiplications 2. buildF(): 3^lg(K) multiplications 4. Computation of (2mD)^e*Q: iter*K*(6e) multiplications 5. buildG(): iter*3^lg(K) multiplications 6. reducing G*H mod F: 3*(iter-1)*3^lg(K) multiplications 7. polyeval(): 4*3^lg(K) multiplications */ int step2(p,n,B1,B2,x,a,e,iter) mpz_t p,n,x,a; unsigned int B1,iter; double B2; int e; { mpz_t *T=NULL,**F=NULL,*nQx,*G,*lx,*ly,g,y,*tx,*ty; double m; int i,j,st,D,K,phi,residue,k; unsigned int res=0,nbit,extra,doit; unsigned long *Res=NULL, lgK; st=cputime(); if (verbose || go) { printf("x="); mpz_out_str(stdout,10,x); printf("\n"); fflush(stdout); } /* faster for 2,951- c158 (1.82) but slower for 2,749- c123 (1.85) */ if ((base2 && (ispower2 = isbase2(n,1.8)))) { if (ispower2>0) mod=mod2plus; else mod=mod2minus; } else mod=(void*)mpz_mod; /* can't use modmuln here */ /* determines g,y such that g*y^2 = x^3 + a*x^2 + x */ mpz_init(y); mpz_set_ui(y,1); mpz_init(g); mpz_add(g,x,a); mpz_mul(g,g,x); mpz_add_ui(g,g,1); mpz_mul(g,g,x); mod(g,g,n); /* change of coordinates x=g*X-a/3, y=g*Y to return to Weierstrass form */ mpz_mul_ui(u,g,3); mpz_mul(u,u,g); mod(u,u,n); mpz_gcdext(p,v,NULL,u,initial_n); if (mpz_cmp_ui(p,1)!=0) return(1); /* v=1/(3g^2)*/ mpz_mul_ui(x,x,3); mpz_add(x,x,a); mpz_mul(x,x,g); mpz_mul(x,x,v); mod(x,x,n); /* x = (x+a/3)/g = g*(3x+a)/(3g^2) */ mpz_mul_ui(y,y,3); mpz_mul(y,y,g); mpz_mul(y,y,v); mod(y,y,n); mpz_mul(a,a,a); mpz_sub_ui(a,a,3); mpz_neg(a,a); mpz_mul(a,a,v); mod(a,a,n); D=bestD(B2/iter,&K,&phi); if (phi>=K) { printf("error: phi>=K\n"); exit(1); } initprimes(B2,2*D); /* with Q the point obtained by Step 1, we compute (6d+1)^e*Q for 0<=d<=D/3, (6i+1)^e*Q is stored in nQ[i], and 2DQ is stored in nQ[D/3+1] George Woltman noticed we don't need 2*d*Q for D/2 < d < D */ nQx = (mpz_t*) malloc(K*sizeof(mpz_t)); for (i=0;i=K) { printf("error: residue>=K\n"); exit(1); } mpz_set_ui(nQx[K-1], 0); lgK = lg(K); F = (mpz_t**) malloc((lgK+1)*sizeof(mpz_t*)); F[0] = nQx; T = (mpz_t*) malloc(5*K*sizeof(mpz_t)); /* for Karatsuba */ for (i=0;i<5*K;i++) mpz_init(T[i]); if (!go) buildF(F,T,K); /* now F[i] contains polynomials of degree 2^i */ /* Q <- (2D)^e*Q */ if ((res=initpoly(e,lx,ly,x,y,p,a,2*D,2*D))) goto youpi2; /* now (lx[0],ly[0]) is (2D)^e*Q */ for (nbit=0,doit=0,m=4.0*D;nbit(double)B1); } if (verbose) printf("computation of (mD)^%u Q for m=%1.0f to %1.0f took %dms\n", e, m/D, m/D+2.0*K, cputime()-st); if (go) { printf("checking range %1.0f..%1.0f\n",m-(K-1)*2.0*(double)D, m); /* tries to find a factor between nQx[0..K-2] and G[0..K-2] */ for (i=0;i1) { if (nbit==0) { /* copies into T */ for (k=0;k1) for (k=0;k int cputime () { return (int) ((double) clock () * 1000 / CLOCKS_PER_SEC); } #else #include #include int cputime () { struct rusage rus; getrusage (0, &rus); return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; } #endif /* returns +/-k if n is a factor of N=2^k+/-1 with N<=n^threshold, 0 otherwise */ int isbase2(n, threshold) mpz_t n; double threshold; { unsigned int k,lo; int res=0; mpz_t u,w; mpz_init(u); mpz_init(w); lo=mpz_sizeinbase(n,2)-1; mpz_set_ui(u,1); mpz_mul_2exp(u,u,2*lo); mpz_mod(w,u,n); /* 2^(2lo) mod n = +/-2^(2lo-k) if m*n = 2^k+/-1 */ k = mpz_sizeinbase(w,2)-1; /* try w = 2^k */ mpz_set_ui(u,1); mpz_mul_2exp(u,u,k); if (mpz_cmp(w,u)==0) res=k-2*lo; else { /* try w = -2^k */ mpz_neg(w,w); mpz_mod(w,w,n); k = mpz_sizeinbase(w,2)-1; mpz_set_ui(u,1); mpz_mul_2exp(u,u,k); if (mpz_cmp(w,u)==0) res=2*lo-k; } mpz_clear(u); mpz_clear(w); if (abs(res)>(int)(threshold*lo)) res=0; return(res); } void mod2plus(a,b,n) mpz_t a,b,n; /* N = 2^ispower2 + 1 */ { /* 2^k = -1 */ mpz_tdiv_r_2exp(y,b,ispower2); mpz_tdiv_q_2exp(a,b,ispower2); mpz_sub(a,y,a); mpz_mod(a,a,n); } void mod2minus(a,b,n) mpz_t a,b,n; /* N = 2^k - 1, ispower2<0 */ { /* 2^k = 1 */ mpz_tdiv_r_2exp(y,b,-ispower2); mpz_tdiv_q_2exp(a,b,-ispower2); mpz_add(a,y,a); mpz_mod(a,a,n); } #ifdef MODMULN static void mpn_incr (mp_ptr p, mp_limb_t incr) { mp_limb_t x; x = *p + incr; *p++ = x; if (x >= incr) return; while (++(*(p++)) == 0) ; } /* Computes c/R^nn mod n, where n are nn limbs and c has space for size(c)+1 limbs. n must be odd. */ void mpz_mod_n (c, a, n) mpz_t c,a,n; { mp_ptr cp=PTR(c), np=PTR(n); mp_limb_t cy; mp_limb_t q; size_t j,nn=sizen; for (j=ABS(SIZ(c));j<=2*nn;j++) cp[j] = 0; for (j = 0; j < nn; j++) { q = cp[0] * Nprim; cy = mpn_addmul_1 (cp, np, nn, q); mpn_incr (cp + nn, cy); cp++; } cp -= nn; if (cp[2*nn]) { cy = cp[2*nn] - mpn_sub_n (cp, cp + nn, np, nn); while (cy) cy -= mpn_sub_n (cp, cp, np, nn); } else MPN_COPY (cp, cp + nn, nn); MPN_NORMALIZE (cp, nn); SIZ(c) = SIZ(c) < 0 ? -nn : nn; } #endif /* a <- b^e */ #define poly1 mpz_ui_pow_ui /* a = Dickson(e)(b) */ void poly2(a, b, e) mpz_t a, b; unsigned long int e; { int i; if (use_dickson==0) mpz_pow_ui(a, b, e); else { mpz_set_ui(a,1); for (i=e/2-1;i>=0;i--) { mpz_mul(a,a,b); mpz_mul(a,a,b); mpz_add(a,a,dick[i]); } if (e==3) mpz_mul(a,a,b); } } /* puts in l[i], i=0..k the coefficients of Dickson(k,1,x) if k is odd, l[0] contains coeff(1), l[1] contains coeff(3), ... if k is even l[0] contains coeff(0), l[1] contains coeff(2), ... */ void set_dickson(l, k) mpz_t *l; int k; { int i,j; mpz_t *t; t = (mpz_t*) malloc((k/2+1)*sizeof(mpz_t)); for (i=0;i<=k/2;i++) { mpz_init(t[i]); mpz_init(l[i]); } mpz_set_ui(l[0], 1); /* l = Dickson(1,1,x) = x */ mpz_set_si(t[0], -2); mpz_set_ui(t[1], 1); for (i=3;i<=k;i++) { if (i%2) { /* puts result in l */ for (j=0;j=i;j--) mpz_sub(ll[j], ll[j], ll[j-1]); for (j=0;j<=e&&res==0;j++) { if (j>=1) { mpz_set(r, ll[j]); mpz_set_ui(lx[j], 0); for (k=j-1;k>=0&&res==0;k--) if (mpz_cmp(r, ll[k])>=0) { /* compute difference wrt ll[j-1] */ mpz_tdiv_qr(q, r, r, ll[k]); /* r = q*ll[k]+r' */ res = multiplyW2(p,lx[j+1],ly[j+1],lx[k],ly[k],q,n,a,tx,ty); if (mpz_cmp_ui(lx[j], 0)) res = res || addW(p,lx[j],ly[j],lx[j+1],ly[j+1],lx[j],ly[j],n,tx,ty); else { mpz_set(lx[j], lx[j+1]); mpz_set(ly[j], ly[j+1]); } } if (mpz_cmp_ui(r, 0)) { res = multiplyW2(p,lx[j+1],ly[j+1],x,y,r,n,a,tx,ty); res = res || addW(p,lx[j],ly[j],lx[j+1],ly[j+1],lx[j],ly[j],n,tx,ty); } } else res=multiplyW2(p,lx[j],ly[j],x,y,ll[j],n,a,tx,ty); } mpz_clear(tx); mpz_clear(ty); mpz_clear(q); mpz_clear(r); for (j=0;j<=e+1;j++) mpz_clear(ll[j]); free(ll); if (verbose) printf("initialization of table of differences took %dms, %d muls and %d gcdexts\n",cputime()-st,mul-mul0,gcdexts-gcdext0); return res; } /* (x1:y1) <- q*(x:y) where q is a small integer */ int multiplyW(p,x1,y1,x,y,q,n,a,u,v) mpz_t p,x1,y1,x,y,n,a,u,v; unsigned int q; { unsigned int j,r,restore; mpz_t x2,y2; restore=(x1==x); if (restore) { mpz_init(x2); mpz_init(y2); x1=x2; y1=y2; } for (r=q,j=1;r!=1;r/=2,j<<=1); j >>= 1; r=duplicateW(p,x1,y1,x,y,n,a,u,v); if (r) return(r); if (q&j) r=addW(p,x1,y1,x1,y1,x,y,n,u,v); if (r) return(r); j >>= 1; while (j!=0) { if (duplicateW(p,x1,y1,x1,y1,n,a,u,v)) return(1); if (q&j) if (addW(p,x1,y1,x1,y1,x,y,n,u,v)) return(1); j >>= 1; } if (restore) { mpz_set(x,x1); mpz_set(y,y1); mpz_clear(x2); mpz_clear(y2); } return(0); } #define getbit(x,i) (PTR(x)[i/mp_bits_per_limb] & ((mp_limb_t)1<<(i%mp_bits_per_limb))) /* (x1:y1) <- q*(x:y) where q is a large integer */ int multiplyW2(p,x1,y1,x,y,q,n,a,u,v) mpz_t p,x1,y1,x,y,n,a,u,v,q; { int j,neg=0; if (mpz_cmp_ui(q,0)<0) { neg=1; mpz_neg(q,q); } if (mpz_cmp_ui(q,1)==0) { mpz_set(x1,x); mpz_set(y1,y); goto exit_multiplyW2; } j=mpz_sizeinbase(q,2)-2; if (duplicateW(p,x1,y1,x,y,n,a,u,v)) return 1; if (getbit(q,j) && addW(p,x1,y1,x1,y1,x,y,n,u,v)) return 1; j--; while (j>=0) { if (duplicateW(p,x1,y1,x1,y1,n,a,u,v)) return 1; if (getbit(q,j) && addW(p,x1,y1,x1,y1,x,y,n,u,v)) return 1; j--; } exit_multiplyW2: if (neg) { mpz_neg(y1,y1); mpz_neg(q,q); } return 0; } /* (x,y) can be identical to (x1,y1) */ int duplicateW(p,x1,y1,x,y,n,a,u,v) mpz_t p,x1,y1,x,y,n,a,u,v; { mpz_mul_ui(u,y,2); mpz_gcdext(p,v,NULL,u,initial_n); if (mpz_cmp_ui(p,1)!=0) return(1); mpz_mul(u,x,x); mpz_mul_ui(u,u,3); mpz_add(u,u,a); mod(u,u,n); mpz_mulmod(p,u,v,n); mpz_mul(u,p,p); mpz_mul_ui(v,x,2); mpz_sub(u,u,v); mod(u,u,n); mpz_sub(v,x,u); mpz_mul(v,v,p); mpz_sub(y1,v,y); mod(y1,y1,n); mpz_set(x1,u); mul+=4; gcdexts++; return(0); } /* performs the following loop with only one gcdext, using Montgomery's trick: for (j=0;j1 (3 muls for e=1) */ int addWn(p,x,y,n,u,v,e) mpz_t p,*x,*y,n,*u,*v; int e; { int j; mpz_sub(u[e-1],x[e],x[e-1]); mpz_set(v[e-1],u[e-1]); for (j=e-2;j>=0;j--) { mpz_sub(u[j],x[j+1],x[j]); mpz_mulmod(v[j],u[j],v[j+1],n); /* v[j] = u[j]*u[j+1]*...*u[e-1] */ mul++; } mpz_gcdext(p,v[e],NULL,v[0],initial_n); if (mpz_cmp_ui(p,1)!=0) return(1); gcdexts++; for (j=0;j=1) { for (i=n-2*D;i>=0;i-=2*D) { /* 3D copies is expensive: perhaps we could save some? */ for (j=0;j<2*D;j++) mpz_set(T[j], G[i+j]); /* divide G[i..i+2D-1] by F[d][i..i+D-1] and put rem in place */ RecursiveDivision(T+2*D, G+i, F[d]+i, D, T+3*D); /* divide by F[d][i+D..i+2D-1] and store remainder in G[i+D..i+2D-1] */ RecursiveDivision(T+2*D, T, F[d]+D+i, D, T+3*D); for (j=0;j