Im trying to build a website where you have a canvas, 28x28 sized and you draw a number. This is all handled through classic HTML, css and Javascript. Hovewever, once the image is drawn, I want to store that as an array (pixel drawn is a 0, and pixel not drawn is valued 255), and pass it onto a python algorithm that I have, that uses the k-nearest neighbors algorithm to try and determine which number it is.
All the code is already done, the only issue im having is trying to get that javascript array into my pyscript.
Im not proffiecient in javascript, so im having trouble writing my own code with it, but I think all the code is written, except that communication between js and Pyscript
CODE IN HTML
<html>
<head>
<title>My OCR project</title>
<link rel="stylesheet" href = "https://pyscript.net/alpha/pyscript.css"/>
<script defer src="https://pyscript.net/alpha/pyscript.js"></script>
<link rel="stylesheet" href="style.css">
<script src="main.js"></script>
</head>
<body>
<h1 align="center">Optical Character Recognition</h1>
<p align = "center">Este proyecto está hecho por Luca Siegel usando una mezcla de Python, Pyscript, HTML, JavaScript y CSS.
El objetivo es dibujar un numero de 28x28 pixeles en el canvas y que un algoritmo de machine learning llamado knn
(k-nearest-neighbors) sea capaz de distinguirlo comparándolo con la database de MNIST. Actualmente hay una probabilidad de acierto
de alrededor del 80%.
</p>
<canvas id="gridCanvas" width="420px" height="420px"></canvas>
<button id="clearButton">Limpiar</button>
<button id="processButton">Procesar</button>
<py-script >
import time
DATA_DIR = r"C:/Users/lucas/Downloads/OCR/"
TEST_DIR = r"C:/Users/lucas/Downloads/OCR/test/"
TEST_DATA_FILENAME = DATA_DIR + "t10k-images.idx3-ubyte"
TEST_LABELS_FILENAME = DATA_DIR + "t10k-labels.idx1-ubyte"
TRAIN_DATA_FILENAME = DATA_DIR + "train-images.idx3-ubyte"
TRAIN_LABELS_FILENAME = DATA_DIR + "train-labels.idx1-ubyte"
start_time = time.time()
DEBUG = True
if DEBUG:
from PIL import Image
import numpy as np
def read_image(path):
return np.asarray(Image.open(path).convert("L"))
def write_image(image,path):
img = Image.fromarray(np.array(image),"L")
img.save(path)
def bytes_to_int(byte_data):
return int.from_bytes(byte_data,"big")
def read_labels(filename,n_max_labels = None):
labels = [] #variable que guarda todas las imagenes
with open(filename, "rb") as f: #abrir el fichero filename como f, y leerlo en binario ("rb")
_ =f.read(4) #numero inutil (representa algo que no necesitamos)
#los siguientes 12 bytes representan el numero de imagenes, el numero de filas y de columnas
n_labels = bytes_to_int(f.read(4))
if n_max_labels:
n_labels = n_max_labels
for label_idx in range(n_labels):
label = f.read(1)
labels.append(label)
return labels
count = 0
def read_images(filename,n_max_images = None):
global count
images = [] #variable que guarda todas las imagenes
with open(filename, "rb") as f: #abrir el fichero filename como f, y leerlo en binario ("rb")
_ =f.read(4) #numero inutil (representa algo que no necesitamos)
#los siguientes 12 bytes representan el numero de imagenes, el numero de filas y de columnas
n_images = bytes_to_int(f.read(4))
if n_max_images:
n_images = n_max_images
n_rows = bytes_to_int(f.read(4))
n_columns = bytes_to_int(f.read(4))
for image_idx in range(n_images):
image = []#variable que guarda la imagen actual
for row_idx in range(n_rows):
row = []#variable que guarda la columna actual
for column_idx in range(n_columns):
count += 1
pixel = f.read(1) #leemos el pixel actual de 8 bits y lo apendizamos a la row
row.append(pixel)
image.append(row)#metemos la row en la image
images.append(image)#metemos la image en el conjunto de images
return images
def aplanar_lista(l):
return [pixel for sublist in l for pixel in sublist]
def pasar_lista_unidimensional(X):
return [aplanar_lista(sample) for sample in X]
def dist(x,y):
return sum((bytes_to_int(x_i) - bytes_to_int(y_i)) **2 for x_i,y_i in zip(x,y))**0.5 #distancia euclides
def distancia_entre_samples(X_train,test_sample):
return [dist(train_sample,test_sample) for train_sample in X_train] #por todas las imagenes, calculamos su distancia arriba
def most_frequent_element(list):
return max(list, key= list.count)
def knn(X_train,y_train,X_test,y_test, k = 3):
y_pred = [] #la prediccion que tenemos a los x_test
for test_sample_idx,test_sample in enumerate(X_test):
training_distances = distancia_entre_samples(X_train,test_sample) #queremos conseguir las distancias a todos los puntos
sorted_distance_indices = [
pair[0]
for pair in sorted(enumerate(training_distances), key = lambda x: x[1]) ]#escogemos la menor distancia
candidates = [bytes_to_int(y_train[idx]) for idx in sorted_distance_indices[:k]] # k mejores candidatos
top_candidate = most_frequent_element(candidates)
y_pred.append(top_candidate) #apuntamos a predicción
return y_pred
def main():
#"X" es igual al dataset y "y" es el label asignado
X_train = read_images(TRAIN_DATA_FILENAME,1000)
X_test = read_images(TEST_DATA_FILENAME,500)
y_train = read_labels(TRAIN_LABELS_FILENAME,1000)
y_test = read_labels(TEST_LABELS_FILENAME,500)
if DEBUG:
for idx,test_sample in enumerate(X_test):
write_image(test_sample,f"{TEST_DIR}{idx}.png")
#inputear nuestra propia imagen
X_test= [read_image(f"{TEST_DIR}mitest.png")]
print(X_test)
X_train = pasar_lista_unidimensional(X_train) #queremos pasar la matriz de valores a una matriz unidimensional
X_test = pasar_lista_unidimensional(X_test)
print(X_test)
y_pred = knn(X_train,y_train,X_test,y_test,5)
accuracy = sum([bytes_to_int(y_pred_i) == y_test_i
for y_pred_i,y_test_i
in zip(y_test,y_pred)
])/len(y_test)
end_time = time.time()
print("The number you have just written is: " ,y_pred, "Time that it took to guess:", round(end_time-start_time,5), " Seconds")
print("Number of iterations: ", count)
if __name__ == "__main__":
main()
</py-script>
</body>
</html>
CODE IN JAVASCRIPT
document.addEventListener('DOMContentLoaded', function() {
const canvas = document.getElementById('gridCanvas');
const ctx = canvas.getContext('2d');
const gridSize = 28;
const cellSize = canvas.width / gridSize;
let isDrawing = false;
// Función para dibujar la cuadrícula
function drawGrid() {
ctx.beginPath();
for (let i = 1; i < gridSize; i++) {
const pos = i * cellSize;
// Líneas verticales
ctx.moveTo(pos, 0);
ctx.lineTo(pos, canvas.height);
// Líneas horizontales
ctx.moveTo(0, pos);
ctx.lineTo(canvas.width, pos);
}
ctx.strokeStyle = '#ccc';
ctx.stroke();
}
function clearGrid() {
ctx.clearRect(0, 0, canvas.width, canvas.height);
drawGrid();
}
function getGridValues() {
const gridValues = [];
for (let y = 0; y < gridSize; y++) {
const row = [];
for (let x = 0; x < gridSize; x++) {
const imageData = ctx.getImageData(x * cellSize, y * cellSize, 1, 1).data;
const isLightColor = imageData[0] + imageData[1] + imageData[2] > 255 * 3 / 2; // Más de la mitad del valor máximo (blanco)
row.push(dec2bin(isLightColor ? 255 : 0));
}
gridValues.push(row);
}
return gridValues;
}
function downloadImage() {
const imageDataURL = canvas.toDataURL('image/png', 1.0);
const image = new Image();
image.src = imageDataURL;
image.onload = function() {
const resizedCanvas = document.createElement('canvas');
const resizedCtx = resizedCanvas.getContext('2d');
// Establece el nuevo tamaño
resizedCanvas.width = gridSize;
resizedCanvas.height = gridSize;
// Copia la parte del dibujo sin los bordes de la cuadrícula
resizedCtx.drawImage(image, 0, 0, gridSize * cellSize, gridSize * cellSize, 0, 0, gridSize, gridSize);
// Descarga la imagen redimensionada
const resizedDataURL = resizedCanvas.toDataURL('image/png');
const a = document.createElement('a');
a.href = resizedDataURL;
a.download = 'grid.png';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
};
sendGridValues(getGridValues());
}
function dec2bin(dec) {
return (dec >>> 0).toString(2);
}
// Función para manejar el evento de dibujo
function handleDraw(event) {
if (!isDrawing) return;
const x = Math.floor(event.offsetX / cellSize);
const y = Math.floor(event.offsetY / cellSize);
// Dibuja en la celda correspondiente
ctx.fillRect(x * cellSize, y * cellSize, cellSize, cellSize);
}
// Dibuja la cuadrícula inicial
drawGrid();
// Agrega eventos para dibujar en el lienzo
canvas.addEventListener('mousedown', () => {
isDrawing = true;
});
canvas.addEventListener('mousemove', handleDraw);
canvas.addEventListener('mouseup', () => {
isDrawing = false;
// Muestra los valores de la cuadrícula después de terminar de dibujar
});
canvas.addEventListener('mouseleave', () => {
isDrawing = false;
});
document.getElementById('clearButton').addEventListener('click', clearGrid);
document.getElementById('processButton').addEventListener('click', downloadImage);
});
JSONand either thesubmitmethod orfetch+POSTmethod. submitting to an endpoint is independent of the backend language.