# -*- coding: utf-8 -*-
"""
Created on Wed Mar 18 23:47:55 2026

@author: scige
"""

import numpy as np
import pyopencl as cl
import numba
from numba import jit, prange
import time

# --- STEP 1: NUMBA PER PRE-ELABORAZIONE (CPU) ---
# Usiamo parallel=True per sfruttare tutti i core della CPU
@jit(nopython=True, parallel=True)
def prepare_data_numba(n):
    data = np.empty(n, dtype=np.float32)
    for i in prange(n):
        # Simuliamo un calcolo complesso su CPU
        data[i] = np.sin(i) * np.exp(-i/n)
    return data

# --- STEP 2: PYOPENCL PER CALCOLO MASSIVO (GPU) ---
def gpu_compute(data_cpu):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    mf = cl.mem_flags
    
    # Allocazione e copia su GPU
    data_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=data_cpu)
    res_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=data_cpu.nbytes)
    
    kernel = """
    __kernel void gpu_task(__global const float *a, __global float *res) {
        int i = get_global_id(0);
        res[i] = a[i] * a[i] + 0.5f; // Esempio di calcolo GPU
    }
    """
    prg = cl.Program(ctx, kernel).build()
    prg.gpu_task(queue, data_cpu.shape, None, data_gpu, res_gpu)
    
    res_cpu = np.empty_like(data_cpu)
    cl.enqueue_copy(queue, res_cpu, res_gpu)
    return res_cpu

# --- ESECUZIONE ---
N = 99_000_000

# Fase CPU
start = time.time()
for jj in range(10):
    data_prepped = prepare_data_numba(N)
print(f"Time Numba (CPU): {time.time() - start:.4f}s")

# Fase GPU
start = time.time()
for jj in range(10):
    final_res = gpu_compute(data_prepped)
print(f"Time OpenCL (GPU): {time.time() - start:.4f}s")