Tensorflow - Nan loss and constant accuracy when training

as the title says, I am trying to train a neural network to predict outcomes, and I can't figure out what is wrong with my model. I keep getting the exact same accuracy level, and the loss is Nan. I'm so confused... I have looked at other similar questions and still can't seem to get it working. My code for the model and training is below:

import numpy as np
import pandas as pd
import tensorflow as tf
import urllib.request as request
import matplotlib.pyplot as plt
from FlowersCustom import get_MY_data

def get_data():
    IRIS_TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
    IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

    names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'species']
    train = pd.read_csv(IRIS_TRAIN_URL, names=names, skiprows=1)
    test = pd.read_csv(IRIS_TEST_URL, names=names, skiprows=1)

    # Train and test input data
    Xtrain = train.drop("species", axis=1)
    Xtest = test.drop("species", axis=1)

    # Encode target values into binary ('one-hot' style) representation
    ytrain = pd.get_dummies(train.species)
    ytest = pd.get_dummies(test.species)

    return Xtrain, Xtest, ytrain, ytest


def create_graph(hidden_nodes):
    # Reset the graph
    tf.reset_default_graph()

    # Placeholders for input and output data
    X = tf.placeholder(shape=Xtrain.shape, dtype=tf.float64, name='X')
    y = tf.placeholder(shape=ytrain.shape, dtype=tf.float64, name='y')

    # Variables for two group of weights between the three layers of the network
    print(Xtrain.shape, ytrain.shape)
    W1 = tf.Variable(np.random.rand(Xtrain.shape[1], hidden_nodes), dtype=tf.float64)
    W2 = tf.Variable(np.random.rand(hidden_nodes, ytrain.shape[1]), dtype=tf.float64)

    # Create the neural net graph
    A1 = tf.sigmoid(tf.matmul(X, W1))
    y_est = tf.sigmoid(tf.matmul(A1, W2))

    # Define a loss function
    deltas = tf.square(y_est - y)
    loss = tf.reduce_sum(deltas)

    # Define a train operation to minimize the loss
    # optimizer = tf.train.GradientDescentOptimizer(0.005)
    optimizer = tf.train.AdamOptimizer(0.001)
    opt = optimizer.minimize(loss)

    return opt, X, y, loss, W1, W2, y_est


def train_model(hidden_nodes, num_iters, opt, X, y, loss, W1, W2, y_est):
    # Initialize variables and run session
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    losses = []

    # Go through num_iters iterations
    for i in range(num_iters):
        sess.run(opt, feed_dict={X: Xtrain, y: ytrain})
        local_loss = sess.run(loss, feed_dict={X: Xtrain.values, y: ytrain.values})
        losses.append(local_loss)
        weights1 = sess.run(W1)
        weights2 = sess.run(W2)

        y_est_np = sess.run(y_est, feed_dict={X: Xtrain.values, y: ytrain.values})
        correct = [estimate.argmax(axis=0) == target.argmax(axis=0)
                   for estimate, target in zip(y_est_np, ytrain.values)]
        acc = 100 * sum(correct) / len(correct)

        if i % 10 == 0:
            print('Epoch: %d, Accuracy: %.2f, Loss: %.2f' % (i, acc, local_loss))

    print("loss (hidden nodes: %d, iterations: %d): %.2f" % (hidden_nodes, num_iters, losses[-1]))
    sess.close()
    return weights1, weights2


def test_accuracy(weights1, weights2):
    X = tf.placeholder(shape=Xtest.shape, dtype=tf.float64, name='X')
    y = tf.placeholder(shape=ytest.shape, dtype=tf.float64, name='y')
    W1 = tf.Variable(weights1)
    W2 = tf.Variable(weights2)
    A1 = tf.sigmoid(tf.matmul(X, W1))
    y_est = tf.sigmoid(tf.matmul(A1, W2))

    # Calculate the predicted outputs
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        y_est_np = sess.run(y_est, feed_dict={X: Xtest, y: ytest})

    # Calculate the prediction accuracy
    correct = [estimate.argmax(axis=0) == target.argmax(axis=0)
               for estimate, target in zip(y_est_np, ytest.values)]
    accuracy = 100 * sum(correct) / len(correct)
    print('final accuracy: %.2f%%' % accuracy)


def get_inputs_and_outputs(train, test, output_column_name):
    Xtrain = train.drop(output_column_name, axis=1)
    Xtest = test.drop(output_column_name, axis=1)
    ytrain = pd.get_dummies(getattr(train, output_column_name))
    ytest = pd.get_dummies(getattr(test, output_column_name))

    return Xtrain, Xtest, ytrain, ytest




if __name__ == '__main__':

    train, test = get_MY_data('output')

    Xtrain, Xtest, ytrain, ytest = get_inputs_and_outputs(train, test, 'output')#get_data()
    # Xtrain, Xtest, ytrain, ytest = get_data()

    hidden_layers = 10
    num_epochs = 500


    opt, X, y, loss, W1, W2, y_est = create_graph(hidden_layers)
    w1, w2 = train_model(hidden_layers, num_epochs, opt, X, y, loss, W1, W2, y_est)
    # test_accuracy(w1, w2)

Here is a screenshot of what the training is printing out: enter image description here

And this is a screenshot of the Pandas Dataframe that I am using for the input data (5 columns of floats): enter image description here

And finally, here is the Pandas Dataframe that I am using for the expected outputs (1 column of either -1 or 1): enter image description here