# for reproducibility
set.seed(123)
Semicircle Problem with Simulation
If we have four ducks swimming in a circle and their locations are random and independent, what is the probability that all four of the ducks are in the same half of the circle? In this project, I solved this problem through simulation in R. I found that the probability of this to happen is around 50%.
We first generate the duck locations
# create a function to genereate random location in a circle
<- function(n, radius){
generate_point_in_circle <- data.frame()
output while (nrow(output) < n) {
<- runif(2, min = -radius, max = radius)
iteration if (iteration[1]^2 + iteration[2]^2 <= radius^2) {
<- rbind(output, iteration)
output
}
}colnames(output) <- c("x", "y")
return(output)
}
# generate 10000 samples of set of four points
<- 10000
n <- generate_point_in_circle(n, 1)
a <- generate_point_in_circle(n, 1)
b <- generate_point_in_circle(n, 1)
c <- generate_point_in_circle(n, 1)
d
par(mfrow = c(2,2))
plot(a[, "x"], a[, "y"], asp=1,
main = "Locations for point a", xlab = "x", ylab = "y")
plot(b[, "x"], b[, "y"], asp=1,
main = "Locations for point b", xlab = "x", ylab = "y")
plot(c[, "x"], c[, "y"], asp=1,
main = "Locations for point c", xlab = "x", ylab = "y")
plot(d[, "x"], d[, "y"], asp=1,
main = "Locations for point d", xlab = "x", ylab = "y")
Then we check for each set of points whether they fall within the same semicircle or not.
# create variable to record the number of TRUE happens
<- 0
count_ture for (i in 1:n) {
<- 0
cond # check whether b, c, d falls within the semicircle created by a and origin (0,0)
if (b[i,"y"]*a[i,"x"]-b[i,"x"]*a[i,"y"] > 0 &
"y"]*a[i,"x"]-c[i,"x"]*a[i,"y"] > 0 &
c[i,"y"]*a[i,"x"]-d[i,"x"]*a[i,"y"] > 0) {
d[i,<- cond + 1
cond
}if (b[i,"y"]*a[i,"x"]-b[i,"x"]*a[i,"y"] < 0 &
"y"]*a[i,"x"]-c[i,"x"]*a[i,"y"] < 0 &
c[i,"y"]*a[i,"x"]-d[i,"x"]*a[i,"y"] < 0) {
d[i,<- cond + 1
cond
}# check whether a, c, d falls within the semicircle created by b and origin (0,0)
if (a[i,"y"]*b[i,"x"]-a[i,"x"]*b[i,"y"] > 0 &
"y"]*b[i,"x"]-c[i,"x"]*b[i,"y"] > 0 &
c[i,"y"]*b[i,"x"]-d[i,"x"]*b[i,"y"] > 0) {
d[i,<- cond + 1
cond
}if (a[i,"y"]*b[i,"x"]-a[i,"x"]*b[i,"y"] < 0 &
"y"]*b[i,"x"]-c[i,"x"]*b[i,"y"] < 0 &
c[i,"y"]*b[i,"x"]-d[i,"x"]*b[i,"y"] < 0) {
d[i,<- cond + 1
cond
}# check whether a, b, d falls within the semicircle created by c and origin (0,0)
if (a[i,"y"]*c[i,"x"]-a[i,"x"]*c[i,"y"] > 0 &
"y"]*c[i,"x"]-b[i,"x"]*c[i,"y"] > 0 &
b[i,"y"]*c[i,"x"]-d[i,"x"]*c[i,"y"] > 0) {
d[i,<- cond + 1
cond
}if (a[i,"y"]*c[i,"x"]-a[i,"x"]*c[i,"y"] < 0 &
"y"]*c[i,"x"]-b[i,"x"]*c[i,"y"] < 0 &
b[i,"y"]*c[i,"x"]-d[i,"x"]*c[i,"y"] < 0) {
d[i,<- cond + 1
cond
}# check whether a, b, c falls within the semicircle created by d and origin (0,0)
if (a[i,"y"]*d[i,"x"]-a[i,"x"]*d[i,"y"] > 0 &
"y"]*d[i,"x"]-b[i,"x"]*d[i,"y"] > 0 &
b[i,"y"]*d[i,"x"]-c[i,"x"]*d[i,"y"] > 0) {
c[i,<- cond + 1
cond
}if (a[i,"y"]*d[i,"x"]-a[i,"x"]*d[i,"y"] < 0 &
"y"]*d[i,"x"]-b[i,"x"]*d[i,"y"] < 0 &
b[i,"y"]*d[i,"x"]-c[i,"x"]*d[i,"y"] < 0) {
c[i,<- cond + 1
cond
}# if any of the 4 conditions is met, then the four points are in the same semicircle
if (cond >= 1) {
<- count_ture + 1
count_ture
}
}
# calculate the proportion
cat("Proportion of four ducks fall within the same semicircle \nwith", n,
"simulations equals =", count_ture*100/n, "%")
Proportion of four ducks fall within the same semicircle
with 10000 simulations equals = 50.82 %
Based on the simulation we can see that the proportion is 50.82% when we have a large sample size. Then I want to see the changing trend in the average proportion with iteration increases. I first create a function to replicate the process above.
<- function(n) {
ducks_simulation <- generate_point_in_circle(n, 1)
a <- generate_point_in_circle(n, 1)
b <- generate_point_in_circle(n, 1)
c <- generate_point_in_circle(n, 1)
d <- 0
count_ture for (i in 1:n) {
<- 0
cond # check whether b, c, d falls within the semicircle created by a and origin (0,0)
if (b[i,"y"]*a[i,"x"]-b[i,"x"]*a[i,"y"] > 0 &
"y"]*a[i,"x"]-c[i,"x"]*a[i,"y"] > 0 &
c[i,"y"]*a[i,"x"]-d[i,"x"]*a[i,"y"] > 0) {
d[i,<- cond + 1
cond
}if (b[i,"y"]*a[i,"x"]-b[i,"x"]*a[i,"y"] < 0 &
"y"]*a[i,"x"]-c[i,"x"]*a[i,"y"] < 0 &
c[i,"y"]*a[i,"x"]-d[i,"x"]*a[i,"y"] < 0) {
d[i,<- cond + 1
cond
}# check whether a, c, d falls within the semicircle created by b and origin (0,0)
if (a[i,"y"]*b[i,"x"]-a[i,"x"]*b[i,"y"] > 0 &
"y"]*b[i,"x"]-c[i,"x"]*b[i,"y"] > 0 &
c[i,"y"]*b[i,"x"]-d[i,"x"]*b[i,"y"] > 0) {
d[i,<- cond + 1
cond
}if (a[i,"y"]*b[i,"x"]-a[i,"x"]*b[i,"y"] < 0 &
"y"]*b[i,"x"]-c[i,"x"]*b[i,"y"] < 0 &
c[i,"y"]*b[i,"x"]-d[i,"x"]*b[i,"y"] < 0) {
d[i,<- cond + 1
cond
}# check whether a, b, d falls within the semicircle created by c and origin (0,0)
if (a[i,"y"]*c[i,"x"]-a[i,"x"]*c[i,"y"] > 0 &
"y"]*c[i,"x"]-b[i,"x"]*c[i,"y"] > 0 &
b[i,"y"]*c[i,"x"]-d[i,"x"]*c[i,"y"] > 0) {
d[i,<- cond + 1
cond
}if (a[i,"y"]*c[i,"x"]-a[i,"x"]*c[i,"y"] < 0 &
"y"]*c[i,"x"]-b[i,"x"]*c[i,"y"] < 0 &
b[i,"y"]*c[i,"x"]-d[i,"x"]*c[i,"y"] < 0) {
d[i,<- cond + 1
cond
}# check whether a, b, c falls within the semicircle created by d and origin (0,0)
if (a[i,"y"]*d[i,"x"]-a[i,"x"]*d[i,"y"] > 0 &
"y"]*d[i,"x"]-b[i,"x"]*d[i,"y"] > 0 &
b[i,"y"]*d[i,"x"]-c[i,"x"]*d[i,"y"] > 0) {
c[i,<- cond + 1
cond
}if (a[i,"y"]*d[i,"x"]-a[i,"x"]*d[i,"y"] < 0 &
"y"]*d[i,"x"]-b[i,"x"]*d[i,"y"] < 0 &
b[i,"y"]*d[i,"x"]-c[i,"x"]*d[i,"y"] < 0) {
c[i,<- cond + 1
cond
}# if any of the 4 conditions is met, then the four points are in the same semicircle
if (cond >= 1) {
<- count_ture + 1
count_ture
}
}return(count_ture*100/n)
}
Use parallel package to do parallel computing with the function. Like above, still simulate for 10000 times.
library(parallel)
<- mclapply(rep(1, 10000), ducks_simulation)
simulation_output <- unlist(simulation_output) simulation_output
Visualize the average proportion trend with the increase of simulation times.
<- data.frame(matrix(nrow = 10000, ncol = 2))
pop_vis colnames(pop_vis) <- c("first_n_iteration", "average proportion")
for (i in 1:10000) {
<- c(i, mean(simulation_output[1:i]))
pop_vis[i,]
}
plot(pop_vis[, "first_n_iteration"], pop_vis[, "average proportion"],
main = "Average proportion of four ducks are in the same semicircle",
xlab = "Number of iteration", ylab = "Average proportion", type = "l",
xlim = c(0, 10000),
ylim = c(min(pop_vis[, "average proportion"]), max(pop_vis[, "average proportion"])))
abline(h = 50, col="lightblue")
The average proportion seems to quickly converge to 50%.
Click here if you want to access a PDF version of this post.